Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 7 additions & 5 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -512,7 +512,7 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def vperm2f128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpermilpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
def vpermilps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">;
}
Expand All @@ -528,6 +528,8 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
def vinsertf128_pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<2, double>, _Constant int)">;
def vinsertf128_ps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<4, float>, _Constant int)">;
def vinsertf128_si256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>, _Constant int)">;
def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;

foreach Op = ["hadd", "hsub"] in {
def Op#pd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, double>)">;
Expand All @@ -536,8 +538,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpermilpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
def vpermilps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
def sqrtpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>)">;
def sqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
def rsqrtps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>)">;
Expand Down Expand Up @@ -2375,10 +2375,12 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
def vcvttss2si32 : X86Builtin<"int(_Vector<4, float>, _Constant int)">;
def vcvttss2usi32 : X86Builtin<"unsigned int(_Vector<4, float>, _Constant int)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpermilpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Constant int)">;
def vpermilps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Constant int)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vpermilvarpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>)">;
def vpermilvarps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>)">;
}
Expand Down
19 changes: 19 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4620,6 +4620,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_pshufd:
case X86::BI__builtin_ia32_pshufd256:
case X86::BI__builtin_ia32_pshufd512:
case X86::BI__builtin_ia32_vpermilps:
case X86::BI__builtin_ia32_vpermilps256:
case X86::BI__builtin_ia32_vpermilps512:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned LaneBase = (DstIdx / 4) * 4;
Expand All @@ -4628,6 +4631,22 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::make_pair(0, static_cast<int>(LaneBase + Sel));
});

case X86::BI__builtin_ia32_vpermilpd:
case X86::BI__builtin_ia32_vpermilpd256:
case X86::BI__builtin_ia32_vpermilpd512:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned Control) {
unsigned NumElemPerLane = 2;
unsigned BitsPerElem = 1;
unsigned MaskBits = 8;
unsigned IndexMask = 0x1;
unsigned Lane = DstIdx / NumElemPerLane;
unsigned LaneOffset = Lane * NumElemPerLane;
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned Index = (Control >> BitIndex) & IndexMask;
return std::make_pair(0, static_cast<int>(LaneOffset + Index));
});

case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
Expand Down
24 changes: 23 additions & 1 deletion clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13023,7 +13023,10 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

case X86::BI__builtin_ia32_pshufd:
case X86::BI__builtin_ia32_pshufd256:
case X86::BI__builtin_ia32_pshufd512: {
case X86::BI__builtin_ia32_pshufd512:
case X86::BI__builtin_ia32_vpermilps:
case X86::BI__builtin_ia32_vpermilps256:
case X86::BI__builtin_ia32_vpermilps512: {
APValue R;
if (!evalShuffleGeneric(
Info, E, R,
Expand All @@ -13040,6 +13043,25 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}

case X86::BI__builtin_ia32_vpermilpd:
case X86::BI__builtin_ia32_vpermilpd256:
case X86::BI__builtin_ia32_vpermilpd512: {
APValue R;
if (!evalShuffleGeneric(Info, E, R, [](unsigned DstIdx, unsigned Control) {
unsigned NumElemPerLane = 2;
unsigned BitsPerElem = 1;
unsigned MaskBits = 8;
unsigned IndexMask = 0x1;
unsigned Lane = DstIdx / NumElemPerLane;
unsigned LaneOffset = Lane * NumElemPerLane;
unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits;
unsigned Index = (Control >> BitIndex) & IndexMask;
return std::make_pair(0, static_cast<int>(LaneOffset + Index));
}))
return false;
return Success(R, E);
}

case X86::BI__builtin_ia32_phminposuw128: {
APValue Source;
if (!Evaluate(Source, Info, E->getArg(0)))
Expand Down
5 changes: 5 additions & 0 deletions clang/test/CodeGen/X86/avx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1400,31 +1400,36 @@ __m128d test_mm_permute_pd(__m128d A) {
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 0>
return _mm_permute_pd(A, 1);
}
TEST_CONSTEXPR(match_m128d(_mm_permute_pd(((__m128d){1.0, 2.0}), 1), 2.0, 1.0));

__m256d test_mm256_permute_pd(__m256d A) {
// CHECK-LABEL: test_mm256_permute_pd
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
return _mm256_permute_pd(A, 5);
}
TEST_CONSTEXPR(match_m256d(_mm256_permute_pd(((__m256d){1.0f, 2.0f, 3.0f, 4.0f}), 5), 2.0f, 1.0f, 4.0f, 3.0f));

__m128 test_mm_permute_ps(__m128 A) {
// CHECK-LABEL: test_mm_permute_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
return _mm_permute_ps(A, 0x1b);
}
TEST_CONSTEXPR(match_m128(_mm_permute_ps(((__m128){1.0, 2.0, 3.0, 4.0}), 0x1b), 4.0, 3.0, 2.0, 1.0));

// Test case for PR12401
__m128 test2_mm_permute_ps(__m128 a) {
// CHECK-LABEL: test2_mm_permute_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 2, i32 1, i32 2, i32 3>
return _mm_permute_ps(a, 0xe6);
}
TEST_CONSTEXPR(match_m128(_mm_permute_ps(((__m128){1.0, 2.0, 3.0, 4.0}), 0xe6), 3.0, 2.0, 3.0, 4.0));

__m256 test_mm256_permute_ps(__m256 A) {
// CHECK-LABEL: test_mm256_permute_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
return _mm256_permute_ps(A, 0x1b);
}
TEST_CONSTEXPR(match_m256(_mm256_permute_ps(((__m256){1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}), 0x1b), 4.0, 3.0, 2.0, 1.0, 8.0, 7.0, 6.0, 5.0));

__m256d test_mm256_permute2f128_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_permute2f128_pd
Expand Down
32 changes: 32 additions & 0 deletions clang/test/CodeGen/X86/avx512f-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -5516,40 +5516,72 @@ __m512d test_mm512_permute_pd(__m512d __X) {
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
return _mm512_permute_pd(__X, 2);
}
TEST_CONSTEXPR(match_m512d(_mm512_permute_pd(((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}), 2), 0.0, 1.0, 2.0, 2.0, 4.0, 4.0, 6.0, 6.0));

__m512d test_mm512_mask_permute_pd(__m512d __W, __mmask8 __U, __m512d __X) {
// CHECK-LABEL: test_mm512_mask_permute_pd
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_mask_permute_pd(__W, __U, __X, 2);
}
TEST_CONSTEXPR(match_m512d(_mm512_mask_permute_pd(
((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
(__mmask8)0b01010100,
((__m512d){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
2),
0.0, 1.0, 10.0, 3.0, 12.0, 5.0, 14.0, 7.0
));

__m512d test_mm512_maskz_permute_pd(__mmask8 __U, __m512d __X) {
// CHECK-LABEL: test_mm512_maskz_permute_pd
// CHECK: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
// CHECK: select <8 x i1> %{{.*}}, <8 x double> %{{.*}}, <8 x double> %{{.*}}
return _mm512_maskz_permute_pd(__U, __X, 2);
}
TEST_CONSTEXPR(match_m512d(_mm512_maskz_permute_pd(
(__mmask8)0b01010100,
((__m512d){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
2),
0.0, 0.0, 2.0, 0.0, 4.0, 0.0, 6.0, 0.0
));

__m512 test_mm512_permute_ps(__m512 __X) {
// CHECK-LABEL: test_mm512_permute_ps
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
return _mm512_permute_ps(__X, 2);
}
TEST_CONSTEXPR(match_m512(_mm512_permute_ps(
((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}),
2),
2, 0, 0, 0, 6, 4, 4, 4, 10, 8, 8, 8, 14, 12, 12, 12
));

__m512 test_mm512_mask_permute_ps(__m512 __W, __mmask16 __U, __m512 __X) {
// CHECK-LABEL: test_mm512_mask_permute_ps
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_mask_permute_ps(__W, __U, __X, 2);
}
TEST_CONSTEXPR(match_m512(_mm512_mask_permute_ps(
((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}),
(__mmask16)0b1010101010101010,
((__m512){16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}),
2),
0, 16, 2, 16, 4, 20, 6, 20, 8, 24, 10, 24, 12, 28, 14, 28
));

__m512 test_mm512_maskz_permute_ps(__mmask16 __U, __m512 __X) {
// CHECK-LABEL: test_mm512_maskz_permute_ps
// CHECK: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
// CHECK: select <16 x i1> %{{.*}}, <16 x float> %{{.*}}, <16 x float> %{{.*}}
return _mm512_maskz_permute_ps(__U, __X, 2);
}
TEST_CONSTEXPR(match_m512(_mm512_maskz_permute_ps(
(__mmask16)0b1010101010101010,
((__m512){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}),
2),
0, 0, 0, 0, 0, 4, 0, 4, 0, 8, 0, 8, 0, 12, 0, 12
));

__m512d test_mm512_permutevar_pd(__m512d __A, __m512i __C) {
// CHECK-LABEL: test_mm512_permutevar_pd
Expand Down
53 changes: 53 additions & 0 deletions clang/test/CodeGen/X86/avx512vl-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -8022,55 +8022,108 @@ __m128d test_mm_mask_permute_pd(__m128d __W, __mmask8 __U, __m128d __X) {
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_mask_permute_pd(__W, __U, __X, 1);
}
TEST_CONSTEXPR(match_m128d(_mm_mask_permute_pd(
((__m128d){0.0, 1.0}),
(__mmask8)0b10,
((__m128d){2.0, 3.0}),
1),
0.0, 2.0
));

__m128d test_mm_maskz_permute_pd(__mmask8 __U, __m128d __X) {
// CHECK-LABEL: test_mm_maskz_permute_pd
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> <i32 1, i32 0>
// CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}
return _mm_maskz_permute_pd(__U, __X, 1);
}
TEST_CONSTEXPR(match_m128d(_mm_maskz_permute_pd(
(__mmask8)0b10,
((__m128d){1.0, 2.0}),
1),
0.0, 1.0
));

__m256d test_mm256_mask_permute_pd(__m256d __W, __mmask8 __U, __m256d __X) {
// CHECK-LABEL: test_mm256_mask_permute_pd
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_mask_permute_pd(__W, __U, __X, 5);
}
TEST_CONSTEXPR(match_m256d(_mm256_mask_permute_pd(
((__m256d){0.0, 1.0, 2.0, 3.0}),
(__mmask8)0b1010,
((__m256d){4.0, 5.0, 6.0, 7.0}),
5),
0.0, 4.0, 2.0, 6.0
));

__m256d test_mm256_maskz_permute_pd(__mmask8 __U, __m256d __X) {
// CHECK-LABEL: test_mm256_maskz_permute_pd
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
// CHECK: select <4 x i1> %{{.*}}, <4 x double> %{{.*}}, <4 x double> %{{.*}}
return _mm256_maskz_permute_pd(__U, __X, 5);
}
TEST_CONSTEXPR(match_m256d(_mm256_maskz_permute_pd(
(__mmask8)0b1010,
((__m256d){4.0, 5.0, 6.0, 7.0}),
5),
0.0, 4.0, 0.0, 6.0
));

__m128 test_mm_mask_permute_ps(__m128 __W, __mmask8 __U, __m128 __X) {
// CHECK-LABEL: test_mm_mask_permute_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_mask_permute_ps(__W, __U, __X, 0x1b);
}
TEST_CONSTEXPR(match_m128(_mm_mask_permute_ps(
((__m128){0.0, 1.0, 2.0, 3.0}),
(__mmask8)0b1010,
((__m128){4.0, 5.0, 6.0, 7.0}),
0x1b),
0, 6.0, 2.0, 4.0
));

__m128 test_mm_maskz_permute_ps(__mmask8 __U, __m128 __X) {
// CHECK-LABEL: test_mm_maskz_permute_ps
// CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
// CHECK: select <4 x i1> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}
return _mm_maskz_permute_ps(__U, __X, 0x1b);
}
TEST_CONSTEXPR(match_m128(_mm_maskz_permute_ps(
(__mmask8)0b1010,
((__m128){4.0, 5.0, 6.0, 7.0}),
0x1b),
0.0, 6.0, 0.0, 4.0
));


__m256 test_mm256_mask_permute_ps(__m256 __W, __mmask8 __U, __m256 __X) {
// CHECK-LABEL: test_mm256_mask_permute_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_mask_permute_ps(__W, __U, __X, 0x1b);
}
TEST_CONSTEXPR(match_m256(_mm256_mask_permute_ps(
((__m256){0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}),
(__mmask8)0b10101010,
((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
0x1b),
0.0, 10.0, 2.0, 8.0, 4.0, 14.0, 6.0, 12.0
));

__m256 test_mm256_maskz_permute_ps(__mmask8 __U, __m256 __X) {
// CHECK-LABEL: test_mm256_maskz_permute_ps
// CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
// CHECK: select <8 x i1> %{{.*}}, <8 x float> %{{.*}}, <8 x float> %{{.*}}
return _mm256_maskz_permute_ps(__U, __X, 0x1b);
}
TEST_CONSTEXPR(match_m256(_mm256_maskz_permute_ps(
(__mmask8)0b10101010,
((__m256){8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0}),
0x1b),
0.0, 10.0, 0.0, 8.0, 0.0, 14.0, 0.0, 12.0
));

__m128d test_mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) {
// CHECK-LABEL: test_mm_mask_permutevar_pd
Expand Down
Loading