Skip to content

Commit

Permalink
[X86] combineX86ShuffleChain - don't fold to truncate(concat(V1,V2)) …
Browse files Browse the repository at this point in the history
…if it was already a PACK op

Fixes #55050
  • Loading branch information
RKSimon committed Apr 25, 2022
1 parent 16d87ef commit e8305c0
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 1 deletion.
6 changes: 5 additions & 1 deletion llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -37801,7 +37801,11 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
(RootVT.is128BitVector() && Subtarget.hasVLX())) &&
(MaskEltSizeInBits > 8 || Subtarget.hasBWI()) &&
isSequentialOrUndefInRange(Mask, 0, NumMaskElts, 0, 2)) {
if (Depth == 0 && Root.getOpcode() == ISD::TRUNCATE)
// Bail if this was already a truncation or PACK node.
// We sometimes fail to match PACK if we demand known undef elements.
if (Depth == 0 && (Root.getOpcode() == ISD::TRUNCATE ||
Root.getOpcode() == X86ISD::PACKSS ||
Root.getOpcode() == X86ISD::PACKUS))
return SDValue(); // Nothing to do!
ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2);
ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts / 2);
Expand Down
44 changes: 44 additions & 0 deletions llvm/test/CodeGen/X86/vector-shuffle-combining-avx512bwvl.ll
Expand Up @@ -175,3 +175,47 @@ define <8 x i32> @PR46393(<8 x i16> %a0, i8 %a1) {
%sel = select <8 x i1> %mask, <8 x i32> %shl, <8 x i32> zeroinitializer
ret <8 x i32> %sel
}

define i64 @PR55050() {
; X86-LABEL: PR55050:
; X86: # %bb.0: # %entry
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: testb %al, %al
; X86-NEXT: jne .LBB10_2
; X86-NEXT: # %bb.1: # %if
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: .LBB10_2: # %exit
; X86-NEXT: movl %eax, %edx
; X86-NEXT: retl
;
; X64-LABEL: PR55050:
; X64: # %bb.0: # %entry
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: testb %al, %al
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: retq
entry:
%i275 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> undef, <16 x i8> zeroinitializer)
%i277 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> undef, <16 x i8> zeroinitializer)
br i1 undef, label %exit, label %if

if:
%i298 = bitcast <2 x i64> %i275 to <4 x i32>
%i299 = bitcast <2 x i64> %i277 to <4 x i32>
%i300 = shufflevector <4 x i32> %i298, <4 x i32> %i299, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
%i339 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %i300, <4 x i32> undef)
%i354 = shufflevector <8 x i16> %i339, <8 x i16> undef, <8 x i32> <i32 0, i32 undef, i32 2, i32 undef, i32 4, i32 undef, i32 6, i32 undef>
%i356 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %i354, <8 x i16> undef)
%i357 = shufflevector <16 x i8> %i356, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 5, i32 4, i32 16, i32 2, i32 1, i32 0, i32 16, i32 10, i32 9, i32 8, i32 16, i32 16, i32 16, i32 16, i32 16>
%i361 = extractelement <16 x i8> %i357, i64 8
%i360 = and i8 %i361, 63
%i379 = zext i8 %i360 to i64
br label %exit

exit:
%res = phi i64 [ %i379, %if ], [ 0, %entry ]
ret i64 %res
}
declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>)
declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>)
declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>)

0 comments on commit e8305c0

Please sign in to comment.