-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86] Handle undef/zero/ones cases after modifying Ops and Masks #158428
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-x86 Author: Hongyu Chen (XChy) ChangesFixes #158415. Full diff: https://github.com/llvm/llvm-project/pull/158428.diff 2 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3631016b0f5c7..0699ebf6f6f88 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -41567,6 +41567,17 @@ static SDValue combineX86ShufflesRecursively(
resolveTargetShuffleInputsAndMask(Ops, Mask);
}
+ // Handle the all undef/zero/ones cases.
+ if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; }))
+ return DAG.getUNDEF(RootVT);
+ if (all_of(Mask, [](int Idx) { return Idx < 0; }))
+ return getZeroVector(RootVT, Subtarget, DAG, DL);
+ if (Ops.size() == 1 && ISD::isBuildVectorAllOnes(Ops[0].getNode()) &&
+ !llvm::is_contained(Mask, SM_SentinelZero))
+ return getOnesVector(RootVT, DAG, DL);
+
+ assert(!Ops.empty() && "Shuffle with no inputs detected");
+
// We can only combine unary and binary shuffle mask cases.
if (Ops.size() <= 2) {
// Minor canonicalization of the accumulated shuffle mask to make it easier
diff --git a/llvm/test/CodeGen/X86/pr158415.ll b/llvm/test/CodeGen/X86/pr158415.ll
new file mode 100644
index 0000000000000..2fbbb90129826
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr158415.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64 -mattr=+avx2 | FileCheck %s
+
+define <32 x i16> @test(<8 x i8> %arg) {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[u,u],zero,xmm0[u,u,u,0,2,u,u,u,u,u,u,u,4]
+; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,0,2,1]
+; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,u,24],zero,ymm0[25],zero,ymm0[30],zero,ymm0[31],zero,ymm0[u,u,u,u,u,u,u,u]
+; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
+; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[3,2,2,3]
+; CHECK-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[12,13,14,15],zero,zero,ymm1[4,5,u,u,u,u,u,u,u,u,28,29,30,31],zero,zero,ymm1[20,21],zero,zero,ymm1[26,27,28,29,30,31]
+; CHECK-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,3,0,2]
+; CHECK-NEXT: vpbroadcastw {{.*#+}} ymm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
+; CHECK-NEXT: vpxor %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpxor %xmm2, %xmm0, %xmm2
+; CHECK-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,xmm2[0,1,2,3,4,5,6,7,8,9,10,11,12,13]
+; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; CHECK-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT: vpbroadcastw %xmm1, %ymm3
+; CHECK-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0,1],ymm3[2],ymm0[3,4,5,6,7,8,9],ymm3[10],ymm0[11,12,13,14,15]
+; CHECK-NEXT: vpxor %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7]
+; CHECK-NEXT: vpshufb {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,xmm2[14,15],zero,zero,zero,zero,xmm2[u,u],zero,zero
+; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm1
+; CHECK-NEXT: vpmovzxwd {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
+; CHECK-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0,1,2,3,4,5],xmm1[6],xmm2[7]
+; CHECK-NEXT: retq
+entry:
+ %shuffle2 = shufflevector <8 x i8> %arg, <8 x i8> zeroinitializer, <32 x i32> <i32 2, i32 2, i32 9, i32 3, i32 1, i32 0, i32 0, i32 2, i32 0, i32 5, i32 9, i32 6, i32 5, i32 4, i32 7, i32 2, i32 7, i32 9, i32 4, i32 0, i32 9, i32 2, i32 4, i32 3, i32 3, i32 2, i32 2, i32 3, i32 9, i32 0, i32 6, i32 4>
+ %conv3 = zext <32 x i8> %shuffle2 to <32 x i16>
+ %shuffle4 = shufflevector <32 x i16> zeroinitializer, <32 x i16> %conv3, <32 x i32> <i32 5, i32 3, i32 4, i32 47, i32 5, i32 5, i32 3, i32 63, i32 4, i32 4, i32 60, i32 2, i32 2, i32 5, i32 4, i32 0, i32 38, i32 1, i32 0, i32 3, i32 59, i32 2, i32 3, i32 1, i32 1, i32 0, i32 3, i32 34, i32 0, i32 0, i32 62, i32 5>
+ %not = xor <32 x i16> %shuffle4, splat (i16 1)
+ %shuffle5 = shufflevector <32 x i16> zeroinitializer, <32 x i16> %not, <32 x i32> <i32 3, i32 9, i32 3, i32 1, i32 9, i32 8, i32 9, i32 2, i32 0, i32 8, i32 48, i32 8, i32 35, i32 3, i32 0, i32 4, i32 4, i32 7, i32 4, i32 39, i32 9, i32 0, i32 59, i32 6, i32 0, i32 4, i32 9, i32 1, i32 1, i32 2, i32 8, i32 9>
+ ret <32 x i16> %shuffle5
+}
+
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
…m#158428) Fixes llvm#158415. After `resolveTargetShuffleInputsAndMask` and other modifications on `Ops` and `Mask`, unused inputs in `Ops` are erased, and may leave `Ops` empty. This patch handles such cases before calling the final `combineX86ShuffleChain`。
Fixes #158415.
After
resolveTargetShuffleInputsAndMask
and other modifications onOps
andMask
, unused inputs inOps
are erased, and may leaveOps
empty. This patch handles such cases before calling the finalcombineX86ShuffleChain
。