Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
def cmpsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
}


let Features = "sse3" in {
let Features = "sse3",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
foreach Op = ["addsub"] in {
def Op#ps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>)">;
def Op#pd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>)">;
Expand Down Expand Up @@ -121,7 +121,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
}

// AVX
let Attributes = [Const, NoThrow, RequiredVectorWidth<256>], Features = "avx" in {
let Attributes = [Const, NoThrow, Constexpr, RequiredVectorWidth<256>],
Features = "avx" in {
foreach Op = ["addsub", "max", "min"] in {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this also affects the max/min methods - you need to split off the addsub builtins before you make them constexpr (maybe add them further down to the block with movmskpd256 etc?)

def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
def Op#ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>)">;
Expand Down
30 changes: 30 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4279,6 +4279,36 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
F.subtract(RHS, RM);
return F;
});
case clang::X86::BI__builtin_ia32_addsubpd:
case clang::X86::BI__builtin_ia32_addsubps:
case clang::X86::BI__builtin_ia32_addsubpd256:
case clang::X86::BI__builtin_ia32_addsubps256: {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use a static function like everything else.

// Addsub: alternates between subtraction and addition
// Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();
FPOptions FPO = Call->getFPFeaturesInEffect(S.Ctx.getLangOpts());
llvm::RoundingMode RM = getRoundingMode(FPO);
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
unsigned NumElts = VT->getNumElements();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
unsigned NumElts = VT->getNumElements();
unsigned NumElems = VT->getNumElements();


using T = PrimConv<PT_Float>::T;
for (unsigned I = 0; I < NumElts; ++I) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for (unsigned I = 0; I < NumElts; ++I) {
for (unsigned I = 0; I != NumElts; ++I) {

APFloat LElem = LHS.elem<T>(I).getAPFloat();
APFloat RElem = RHS.elem<T>(I).getAPFloat();
if (I % 2 == 0) {
// Even indices: subtract
LElem.subtract(RElem, RM);
} else {
// Odd indices: add
LElem.add(RElem, RM);
}
Dst.elem<T>(I) = static_cast<T>(LElem);
}
Dst.initializeAllElements();
return true;
}

case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
Expand Down
29 changes: 29 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13383,6 +13383,35 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
}
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case clang::X86::BI__builtin_ia32_addsubpd:
case clang::X86::BI__builtin_ia32_addsubps:
case clang::X86::BI__builtin_ia32_addsubpd256:
case clang::X86::BI__builtin_ia32_addsubps256: {
// Addsub: alternates between subtraction and addition
// Result[i] = (i % 2 == 0) ? (a[i] - b[i]) : (a[i] + b[i])
APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
return false;
unsigned NumElts = SourceLHS.getVectorLength();
SmallVector<APValue, 8> ResultElements;
ResultElements.reserve(NumElts);
llvm::RoundingMode RM = getActiveRoundingMode(getEvalInfo(), E);

for (unsigned I = 0; I < NumElts; ++I) {
APFloat LHS = SourceLHS.getVectorElt(I).getFloat();
APFloat RHS = SourceRHS.getVectorElt(I).getFloat();
if (I % 2 == 0) {
// Even indices: subtract
LHS.subtract(RHS, RM);
} else {
// Odd indices: add
LHS.add(RHS, RM);
}
ResultElements.push_back(APValue(LHS));
}
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case Builtin::BI__builtin_elementwise_fshl:
case Builtin::BI__builtin_elementwise_fshr: {
APValue SourceHi, SourceLo, SourceShift;
Expand Down
22 changes: 14 additions & 8 deletions clang/lib/Headers/avx512fintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -8383,24 +8383,30 @@ _mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) {
(__v16sf)_mm512_setzero_ps());
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_move_ss(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss(__m128 __W,
__mmask8 __U,
__m128 __A,
__m128 __B) {
return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B), __W);
}

static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_move_ss(__mmask8 __U, __m128 __A, __m128 __B) {
static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss(__mmask8 __U,
__m128 __A,
__m128 __B) {
return __builtin_ia32_selectss_128(__U, _mm_move_ss(__A, __B),
_mm_setzero_ps());
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_move_sd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd(__m128d __W,
__mmask8 __U,
__m128d __A,
__m128d __B) {
return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B), __W);
}

static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_move_sd(__mmask8 __U, __m128d __A, __m128d __B) {
static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd(__mmask8 __U,
__m128d __A,
__m128d __B) {
return __builtin_ia32_selectsd_128(__U, _mm_move_sd(__A, __B),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

pull this out into its own PR

_mm_setzero_pd());
}
Expand Down
10 changes: 4 additions & 6 deletions clang/lib/Headers/avxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,8 @@ static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_sub_ps(__m256 __a,
/// A 256-bit vector of [4 x double] containing the right source operand.
/// \returns A 256-bit vector of [4 x double] containing the alternating sums
/// and differences between both operands.
static __inline __m256d __DEFAULT_FN_ATTRS
_mm256_addsub_pd(__m256d __a, __m256d __b)
{
static __inline __m256d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_addsub_pd(__m256d __a, __m256d __b) {
return (__m256d)__builtin_ia32_addsubpd256((__v4df)__a, (__v4df)__b);
}

Expand All @@ -166,9 +165,8 @@ _mm256_addsub_pd(__m256d __a, __m256d __b)
/// A 256-bit vector of [8 x float] containing the right source operand.
/// \returns A 256-bit vector of [8 x float] containing the alternating sums and
/// differences between both operands.
static __inline __m256 __DEFAULT_FN_ATTRS
_mm256_addsub_ps(__m256 __a, __m256 __b)
{
static __inline __m256 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_addsub_ps(__m256 __a, __m256 __b) {
return (__m256)__builtin_ia32_addsubps256((__v8sf)__a, (__v8sf)__b);
}

Expand Down
7 changes: 3 additions & 4 deletions clang/lib/Headers/pmmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,8 @@ _mm_lddqu_si128(__m128i_u const *__p)
/// A 128-bit vector of [4 x float] containing the right source operand.
/// \returns A 128-bit vector of [4 x float] containing the alternating sums and
/// differences of both operands.
static __inline__ __m128 __DEFAULT_FN_ATTRS
_mm_addsub_ps(__m128 __a, __m128 __b)
{
static __inline__ __m128 __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_addsub_ps(__m128 __a, __m128 __b) {
return __builtin_ia32_addsubps((__v4sf)__a, (__v4sf)__b);
}

Expand Down Expand Up @@ -166,7 +165,7 @@ _mm_moveldup_ps(__m128 __a)
/// A 128-bit vector of [2 x double] containing the right source operand.
/// \returns A 128-bit vector of [2 x double] containing the alternating sums
/// and differences of both operands.
static __inline__ __m128d __DEFAULT_FN_ATTRS
static __inline__ __m128d __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_addsub_pd(__m128d __a, __m128d __b) {
return __builtin_ia32_addsubpd((__v2df)__a, (__v2df)__b);
}
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/X86/avx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,12 +45,14 @@ __m256d test_mm256_addsub_pd(__m256d A, __m256d B) {
// CHECK: call {{.*}}<4 x double> @llvm.x86.avx.addsub.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}})
return _mm256_addsub_pd(A, B);
}
TEST_CONSTEXPR(match_m256d(_mm256_addsub_pd((__m256d){+1.0, +2.0, +3.0, +4.0}, (__m256d){+1.0, +1.0, +1.0, +1.0}), +0.0, +3.0, +2.0, +5.0));

__m256 test_mm256_addsub_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_addsub_ps
// CHECK: call {{.*}}<8 x float> @llvm.x86.avx.addsub.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}})
return _mm256_addsub_ps(A, B);
}
TEST_CONSTEXPR(match_m256(_mm256_addsub_ps((__m256){+1.0f, +2.0f, +3.0f, +4.0f, +5.0f, +6.0f, +7.0f, +8.0f}, (__m256){+1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f, +1.0f}), +0.0f, +3.0f, +2.0f, +5.0f, +4.0f, +7.0f, +6.0f, +9.0f));

__m256d test_mm256_and_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_and_pd
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/X86/sse3-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,14 @@ __m128d test_mm_addsub_pd(__m128d A, __m128d B) {
// CHECK: call {{.*}}<2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}})
return _mm_addsub_pd(A, B);
}
TEST_CONSTEXPR(match_m128d(_mm_addsub_pd((__m128d){+2.0, +2.0}, (__m128d){+1.0, +2.0}), +1.0, +4.0));

__m128 test_mm_addsub_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_addsub_ps
// CHECK: call {{.*}}<4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}})
return _mm_addsub_ps(A, B);
}
TEST_CONSTEXPR(match_m128(_mm_addsub_ps((__m128){+3.0f, +4.0f, +5.0f, +6.0f}, (__m128){+1.0f, +2.0f, +3.0f, +4.0f}), +2.0f, +6.0f, +2.0f, +10.0f));

__m128d test_mm_hadd_pd(__m128d A, __m128d B) {
// CHECK-LABEL: test_mm_hadd_pd
Expand Down
Loading