Skip to content

Commit

Permalink
[X86] LowerBUILD_VECTOR - fold build_vector(undef,freeze(undef),zero)…
Browse files Browse the repository at this point in the history
… -> zero vector

426db6b added the build_vector(undef,freeze(undef)) -> freeze(undef) fold, but failed to account for cases where the scalar freeze(undef) had multiple uses, in those cases we can only only safely fold to a zero vector

https://alive2.llvm.org/ce/z/87jG8K
  • Loading branch information
RKSimon committed Apr 26, 2023
1 parent a93c423 commit ae0f41d
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 28 deletions.
12 changes: 9 additions & 3 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11231,6 +11231,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
APInt ZeroMask = APInt::getZero(NumElems);
APInt NonZeroMask = APInt::getZero(NumElems);
bool IsAllConstants = true;
bool OneUseFrozenUndefs = true;
SmallSet<SDValue, 8> Values;
unsigned NumConstants = NumElems;
for (unsigned i = 0; i < NumElems; ++i) {
Expand All @@ -11239,7 +11240,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
UndefMask.setBit(i);
continue;
}
if (Elt.getOpcode() == ISD::FREEZE && Elt.getOperand(0).isUndef()) {
if (ISD::isFreezeUndef(Elt.getNode())) {
OneUseFrozenUndefs = OneUseFrozenUndefs && Elt->hasOneUse();
FrozenUndefMask.setBit(i);
continue;
}
Expand All @@ -11259,10 +11261,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
if (UndefMask.isAllOnes())
return DAG.getUNDEF(VT);

// All undef/freeze(undef) vector. Return an FREEZE UNDEF.
if ((UndefMask | FrozenUndefMask).isAllOnes())
// All undef/freeze(undef) vector. Return a FREEZE UNDEF.
if (OneUseFrozenUndefs && (UndefMask | FrozenUndefMask).isAllOnes())
return DAG.getFreeze(DAG.getUNDEF(VT));

// All undef/freeze(undef)/zero vector. Return a zero vector.
if ((UndefMask | FrozenUndefMask | ZeroMask).isAllOnes())
return getZeroVector(VT, Subtarget, DAG, dl);

// If we have multiple FREEZE-UNDEF operands, we are likely going to end up
// lowering into a suboptimal insertion sequence. Instead, thaw the UNDEF in
// our source BUILD_VECTOR, create another FREEZE-UNDEF splat BUILD_VECTOR,
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/X86/build-vector-128.ll
Original file line number Diff line number Diff line change
Expand Up @@ -556,9 +556,8 @@ define void @pr60168_buildvector_of_zeros_and_undef(<2 x i32> %x, ptr %out) {
; SSE2-32-LABEL: pr60168_buildvector_of_zeros_and_undef:
; SSE2-32: # %bb.0:
; SSE2-32-NEXT: movl {{[0-9]+}}(%esp), %eax
; SSE2-32-NEXT: xorpd %xmm1, %xmm1
; SSE2-32-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
; SSE2-32-NEXT: paddd %xmm0, %xmm0
; SSE2-32-NEXT: pxor %xmm1, %xmm1
; SSE2-32-NEXT: psubd %xmm0, %xmm1
; SSE2-32-NEXT: movdqa %xmm1, %xmm0
; SSE2-32-NEXT: psrad $31, %xmm0
Expand All @@ -569,9 +568,8 @@ define void @pr60168_buildvector_of_zeros_and_undef(<2 x i32> %x, ptr %out) {
;
; SSE2-64-LABEL: pr60168_buildvector_of_zeros_and_undef:
; SSE2-64: # %bb.0:
; SSE2-64-NEXT: xorpd %xmm1, %xmm1
; SSE2-64-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1]
; SSE2-64-NEXT: paddd %xmm0, %xmm0
; SSE2-64-NEXT: pxor %xmm1, %xmm1
; SSE2-64-NEXT: psubd %xmm0, %xmm1
; SSE2-64-NEXT: movdqa %xmm1, %xmm0
; SSE2-64-NEXT: psrad $31, %xmm0
Expand Down
45 changes: 24 additions & 21 deletions llvm/test/CodeGen/X86/freeze-vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -355,7 +355,8 @@ define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst
; X86-NEXT: vmovdqa %xmm0, (%ecx)
; X86-NEXT: vmovd %edx, %xmm0
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7]
; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7]
; X86-NEXT: vpand %xmm1, %xmm0, %xmm0
; X86-NEXT: vmovdqa %xmm0, (%eax)
; X86-NEXT: retl
Expand All @@ -370,7 +371,8 @@ define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst
; X64-NEXT: vmovdqa %xmm0, (%rdx)
; X64-NEXT: vmovd %eax, %xmm0
; X64-NEXT: vpbroadcastd %xmm0, %xmm0
; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2,3]
; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3]
; X64-NEXT: vpand %xmm1, %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rcx)
; X64-NEXT: retq
Expand All @@ -397,13 +399,14 @@ define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1,
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl (%edx), %edx
; X86-NEXT: andl $15, %edx
; X86-NEXT: vmovd %edx, %xmm0
; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7]
; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0
; X86-NEXT: vmovd %edx, %xmm1
; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1]
; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5,6,7]
; X86-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7]
; X86-NEXT: vpand %xmm2, %xmm1, %xmm1
; X86-NEXT: vmovdqa %xmm1, (%ecx)
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
; X86-NEXT: vmovdqa %xmm0, (%ecx)
; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1]
; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
; X86-NEXT: vmovdqa %xmm0, (%eax)
; X86-NEXT: retl
Expand All @@ -412,13 +415,14 @@ define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1,
; X64: # %bb.0:
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: andl $15, %eax
; X64-NEXT: vmovd %eax, %xmm0
; X64-NEXT: vpbroadcastd %xmm0, %xmm0
; X64-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0,1,2,3]
; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0
; X64-NEXT: vmovd %eax, %xmm1
; X64-NEXT: vpbroadcastd %xmm1, %xmm1
; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3]
; X64-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7]
; X64-NEXT: vpand %xmm2, %xmm1, %xmm1
; X64-NEXT: vmovdqa %xmm1, (%rdx)
; X64-NEXT: vpand %xmm2, %xmm0, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rdx)
; X64-NEXT: vpand %xmm2, %xmm1, %xmm0
; X64-NEXT: vmovdqa %xmm0, (%rcx)
; X64-NEXT: retq
%i0.src = load i32, ptr %origin0
Expand All @@ -443,14 +447,13 @@ define void @freeze_two_buildvectors_one_undef_elt(ptr %origin0, ptr %origin1, p
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl (%edx), %edx
; X86-NEXT: andl $15, %edx
; X86-NEXT: vmovd %edx, %xmm0
; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6,7]
; X86-NEXT: vmovddup {{.*#+}} xmm2 = [7,7]
; X86-NEXT: # xmm2 = mem[0,0]
; X86-NEXT: vpand %xmm2, %xmm1, %xmm1
; X86-NEXT: vmovdqa %xmm1, (%ecx)
; X86-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7]
; X86-NEXT: vpand %xmm2, %xmm0, %xmm0
; X86-NEXT: vmovddup {{.*#+}} xmm0 = [7,7]
; X86-NEXT: # xmm0 = mem[0,0]
; X86-NEXT: vmovd %edx, %xmm1
; X86-NEXT: vpand %xmm0, %xmm1, %xmm2
; X86-NEXT: vmovdqa %xmm2, (%ecx)
; X86-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
; X86-NEXT: vpand %xmm0, %xmm1, %xmm0
; X86-NEXT: vmovdqa %xmm0, (%eax)
; X86-NEXT: retl
;
Expand Down

0 comments on commit ae0f41d

Please sign in to comment.