Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1072,24 +1072,24 @@ let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in {
def storeaps512_mask : X86Builtin<"void(_Vector<16, float *>, _Vector<16, float>, unsigned short)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def alignq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
def alignd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def alignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def alignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def alignq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def alignq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
}

Expand Down
18 changes: 18 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4774,6 +4774,24 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::pair<unsigned, int>{VecIdx, ElemIdx};
});

case X86::BI__builtin_ia32_alignd128:
case X86::BI__builtin_ia32_alignd256:
case X86::BI__builtin_ia32_alignd512:
case X86::BI__builtin_ia32_alignq128:
case X86::BI__builtin_ia32_alignq256:
case X86::BI__builtin_ia32_alignq512: {
unsigned NumElems = Call->getType()->castAs<VectorType>()->getNumElements();
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [NumElems](unsigned DstIdx, unsigned Shift) {
unsigned Imm = Shift & 0xFF;
unsigned EffectiveShift = Imm & (NumElems - 1);
unsigned SourcePos = DstIdx + EffectiveShift;
unsigned VecIdx = SourcePos < NumElems ? 1u : 0u;
unsigned ElemIdx = SourcePos & (NumElems - 1);
return std::pair<unsigned, int>{VecIdx, static_cast<int>(ElemIdx)};
});
}

default:
S.FFDiag(S.Current->getLocation(OpPC),
diag::note_invalid_subexpr_in_const_expr)
Expand Down
22 changes: 22 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13551,6 +13551,28 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return false;
return Success(R, E);
}
case X86::BI__builtin_ia32_alignd128:
case X86::BI__builtin_ia32_alignd256:
case X86::BI__builtin_ia32_alignd512:
case X86::BI__builtin_ia32_alignq128:
case X86::BI__builtin_ia32_alignq256:
case X86::BI__builtin_ia32_alignq512: {
APValue R;
unsigned NumElems = E->getType()->castAs<VectorType>()->getNumElements();
if (!evalShuffleGeneric(
Info, E, R, [NumElems](unsigned DstIdx, unsigned Shift) {
unsigned Imm = Shift & 0xFF;
unsigned EffectiveShift = Imm & (NumElems - 1);
unsigned SourcePos = DstIdx + EffectiveShift;
unsigned VecIdx = SourcePos < NumElems ? 1 : 0;
unsigned ElemIdx = SourcePos & (NumElems - 1);

return std::pair<unsigned, int>{VecIdx,
static_cast<int>(ElemIdx)};
}))
return false;
return Success(R, E);
}
case X86::BI__builtin_ia32_permvarsi256:
case X86::BI__builtin_ia32_permvarsf256:
case X86::BI__builtin_ia32_permvardf512:
Expand Down
34 changes: 34 additions & 0 deletions clang/test/CodeGen/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -519,6 +519,40 @@ __m512i test_mm512_maskz_alignr_epi64( __mmask8 u, __m512i a, __m512i b)
return _mm512_maskz_alignr_epi64(u, a, b, 2);
}

TEST_CONSTEXPR(match_v16si(_mm512_alignr_epi32(((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800,
900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}),
((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15}), 19),
3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 100, 200, 300));
TEST_CONSTEXPR(match_v16si(_mm512_mask_alignr_epi32(((__m512i)(__v16si){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000,
9000, 10000, 11000, 12000, 13000, 14000, 15000, 16000}),
0xA5A5,
((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800,
900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}),
((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15}), 19),
3, 2000, 5, 4000, 5000, 8, 7000, 10,
11, 10000, 13, 12000, 13000, 100, 15000, 300));
TEST_CONSTEXPR(match_v16si(_mm512_maskz_alignr_epi32(0x0F0F,
((__m512i)(__v16si){100, 200, 300, 400, 500, 600, 700, 800,
900, 1000, 1100, 1200, 1300, 1400, 1500, 1600}),
((__m512i)(__v16si){0, 1, 2, 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15}), 19),
3, 4, 5, 6, 0, 0, 0, 0, 11, 12, 13, 14, 0, 0, 0, 0));

TEST_CONSTEXPR(match_v8di(_mm512_alignr_epi64(((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}),
((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11),
4, 5, 6, 7, 8, 10, 11, 12));
TEST_CONSTEXPR(match_v8di(_mm512_mask_alignr_epi64(((__m512i)(__v8di){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}),
0xA5,
((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}),
((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11),
4, 2000, 6, 4000, 5000, 10, 7000, 12));
TEST_CONSTEXPR(match_v8di(_mm512_maskz_alignr_epi64(0x33,
((__m512i)(__v8di){10, 11, 12, 13, 14, 15, 16, 17}),
((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 11),
4, 5, 0, 0, 8, 10, 0, 0));

__m512d test_mm512_fmadd_round_pd(__m512d __A, __m512d __B, __m512d __C) {
// CHECK-LABEL: test_mm512_fmadd_round_pd
// CHECK: @llvm.x86.avx512.vfmadd.pd.512
Expand Down
47 changes: 47 additions & 0 deletions clang/test/CodeGen/X86/avx512vl-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -10518,6 +10518,53 @@ __m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
return _mm256_maskz_alignr_epi64(__U, __A, __B, 1);
}

TEST_CONSTEXPR(match_v4si(_mm_alignr_epi32(((__m128i)(__v4si){100, 200, 300, 400}),
((__m128i)(__v4si){10, 20, 30, 40}), 1),
20, 30, 40, 100));
TEST_CONSTEXPR(match_v4si(_mm_mask_alignr_epi32(((__m128i)(__v4si){1000, 2000, 3000, 4000}), 0x5,
((__m128i)(__v4si){100, 200, 300, 400}),
((__m128i)(__v4si){10, 20, 30, 40}), 1),
20, 2000, 40, 4000));
TEST_CONSTEXPR(match_v4si(_mm_maskz_alignr_epi32(0x3,
((__m128i)(__v4si){100, 200, 300, 400}),
((__m128i)(__v4si){10, 20, 30, 40}), 1),
20, 30, 0, 0));

TEST_CONSTEXPR(match_v8si(_mm256_alignr_epi32(((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3),
4, 5, 6, 7, 8, 100, 200, 300));
TEST_CONSTEXPR(match_v8si(_mm256_mask_alignr_epi32(((__m256i)(__v8si){1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000}),
0xA5,
((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3),
4, 2000, 6, 4000, 5000, 100, 7000, 300));
TEST_CONSTEXPR(match_v8si(_mm256_maskz_alignr_epi32(0x33,
((__m256i)(__v8si){100, 200, 300, 400, 500, 600, 700, 800}),
((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 3),
4, 5, 0, 0, 8, 100, 0, 0));

TEST_CONSTEXPR(match_v2di(_mm_alignr_epi64(((__m128i)(__v2di){10, 11}), ((__m128i)(__v2di){1, 2}), 1), 2, 10));
TEST_CONSTEXPR(match_v2di(_mm_mask_alignr_epi64(((__m128i)(__v2di){1000, 2000}), 0x1,
((__m128i)(__v2di){10, 11}),
((__m128i)(__v2di){1, 2}), 1),
2, 2000));
TEST_CONSTEXPR(match_v2di(_mm_maskz_alignr_epi64(0x2,
((__m128i)(__v2di){10, 11}),
((__m128i)(__v2di){1, 2}), 1),
0, 10));

TEST_CONSTEXPR(match_v4di(_mm256_alignr_epi64(((__m256i)(__v4di){10, 11, 12, 13}),
((__m256i)(__v4di){1, 2, 3, 4}), 2),
3, 4, 10, 11));
TEST_CONSTEXPR(match_v4di(_mm256_mask_alignr_epi64(((__m256i)(__v4di){1000, 2000, 3000, 4000}), 0x5,
((__m256i)(__v4di){10, 11, 12, 13}),
((__m256i)(__v4di){1, 2, 3, 4}), 2),
3, 2000, 10, 4000));
TEST_CONSTEXPR(match_v4di(_mm256_maskz_alignr_epi64(0xA,
((__m256i)(__v4di){10, 11, 12, 13}),
((__m256i)(__v4di){1, 2, 3, 4}), 2),
0, 4, 0, 11));

__m128 test_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) {
// CHECK-LABEL: test_mm_mask_movehdup_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
Expand Down
Loading