diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index d5fdd233d82b0..90b3f79e81fb1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -11231,6 +11231,7 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { APInt ZeroMask = APInt::getZero(NumElems); APInt NonZeroMask = APInt::getZero(NumElems); bool IsAllConstants = true; + bool OneUseFrozenUndefs = true; SmallSet Values; unsigned NumConstants = NumElems; for (unsigned i = 0; i < NumElems; ++i) { @@ -11239,7 +11240,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { UndefMask.setBit(i); continue; } - if (Elt.getOpcode() == ISD::FREEZE && Elt.getOperand(0).isUndef()) { + if (ISD::isFreezeUndef(Elt.getNode())) { + OneUseFrozenUndefs = OneUseFrozenUndefs && Elt->hasOneUse(); FrozenUndefMask.setBit(i); continue; } @@ -11259,10 +11261,14 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { if (UndefMask.isAllOnes()) return DAG.getUNDEF(VT); - // All undef/freeze(undef) vector. Return an FREEZE UNDEF. - if ((UndefMask | FrozenUndefMask).isAllOnes()) + // All undef/freeze(undef) vector. Return a FREEZE UNDEF. + if (OneUseFrozenUndefs && (UndefMask | FrozenUndefMask).isAllOnes()) return DAG.getFreeze(DAG.getUNDEF(VT)); + // All undef/freeze(undef)/zero vector. Return a zero vector. + if ((UndefMask | FrozenUndefMask | ZeroMask).isAllOnes()) + return getZeroVector(VT, Subtarget, DAG, dl); + // If we have multiple FREEZE-UNDEF operands, we are likely going to end up // lowering into a suboptimal insertion sequence. Instead, thaw the UNDEF in // our source BUILD_VECTOR, create another FREEZE-UNDEF splat BUILD_VECTOR, diff --git a/llvm/test/CodeGen/X86/build-vector-128.ll b/llvm/test/CodeGen/X86/build-vector-128.ll index df664a92425a0..30f55a1d41922 100644 --- a/llvm/test/CodeGen/X86/build-vector-128.ll +++ b/llvm/test/CodeGen/X86/build-vector-128.ll @@ -556,9 +556,8 @@ define void @pr60168_buildvector_of_zeros_and_undef(<2 x i32> %x, ptr %out) { ; SSE2-32-LABEL: pr60168_buildvector_of_zeros_and_undef: ; SSE2-32: # %bb.0: ; SSE2-32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SSE2-32-NEXT: xorpd %xmm1, %xmm1 -; SSE2-32-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1] ; SSE2-32-NEXT: paddd %xmm0, %xmm0 +; SSE2-32-NEXT: pxor %xmm1, %xmm1 ; SSE2-32-NEXT: psubd %xmm0, %xmm1 ; SSE2-32-NEXT: movdqa %xmm1, %xmm0 ; SSE2-32-NEXT: psrad $31, %xmm0 @@ -569,9 +568,8 @@ define void @pr60168_buildvector_of_zeros_and_undef(<2 x i32> %x, ptr %out) { ; ; SSE2-64-LABEL: pr60168_buildvector_of_zeros_and_undef: ; SSE2-64: # %bb.0: -; SSE2-64-NEXT: xorpd %xmm1, %xmm1 -; SSE2-64-NEXT: movsd {{.*#+}} xmm1 = xmm1[0,1] ; SSE2-64-NEXT: paddd %xmm0, %xmm0 +; SSE2-64-NEXT: pxor %xmm1, %xmm1 ; SSE2-64-NEXT: psubd %xmm0, %xmm1 ; SSE2-64-NEXT: movdqa %xmm1, %xmm0 ; SSE2-64-NEXT: psrad $31, %xmm0 diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll index 5f060fb0d4d93..45587f1f33ece 100644 --- a/llvm/test/CodeGen/X86/freeze-vector.ll +++ b/llvm/test/CodeGen/X86/freeze-vector.ll @@ -355,7 +355,8 @@ define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst ; X86-NEXT: vmovdqa %xmm0, (%ecx) ; X86-NEXT: vmovd %edx, %xmm0 ; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6,7] +; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0,1,2,3],xmm0[4,5],xmm2[6,7] ; X86-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: retl @@ -370,7 +371,8 @@ define void @freeze_two_frozen_buildvectors(ptr %origin0, ptr %origin1, ptr %dst ; X64-NEXT: vmovdqa %xmm0, (%rdx) ; X64-NEXT: vmovd %eax, %xmm0 ; X64-NEXT: vpbroadcastd %xmm0, %xmm0 -; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1,2,3] +; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm2[0,1],xmm0[2],xmm2[3] ; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%rcx) ; X64-NEXT: retq @@ -397,13 +399,14 @@ define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1, ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl (%edx), %edx ; X86-NEXT: andl $15, %edx -; X86-NEXT: vmovd %edx, %xmm0 -; X86-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] -; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3],xmm0[4,5,6,7] +; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovd %edx, %xmm1 +; X86-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[0,0,1,1] +; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5,6,7] ; X86-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7] -; X86-NEXT: vpand %xmm2, %xmm1, %xmm1 -; X86-NEXT: vmovdqa %xmm1, (%ecx) -; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] +; X86-NEXT: vpand %xmm2, %xmm0, %xmm0 +; X86-NEXT: vmovdqa %xmm0, (%ecx) +; X86-NEXT: vpshufd {{.*#+}} xmm0 = xmm1[0,1,0,1] ; X86-NEXT: vpand %xmm2, %xmm0, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: retl @@ -412,13 +415,14 @@ define void @freeze_two_buildvectors_only_one_frozen(ptr %origin0, ptr %origin1, ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %eax ; X64-NEXT: andl $15, %eax -; X64-NEXT: vmovd %eax, %xmm0 -; X64-NEXT: vpbroadcastd %xmm0, %xmm0 -; X64-NEXT: vpblendd {{.*#+}} xmm1 = xmm0[0,1,2,3] +; X64-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; X64-NEXT: vmovd %eax, %xmm1 +; X64-NEXT: vpbroadcastd %xmm1, %xmm1 +; X64-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2,3] ; X64-NEXT: vpbroadcastd {{.*#+}} xmm2 = [7,7,7,7] -; X64-NEXT: vpand %xmm2, %xmm1, %xmm1 -; X64-NEXT: vmovdqa %xmm1, (%rdx) ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0 +; X64-NEXT: vmovdqa %xmm0, (%rdx) +; X64-NEXT: vpand %xmm2, %xmm1, %xmm0 ; X64-NEXT: vmovdqa %xmm0, (%rcx) ; X64-NEXT: retq %i0.src = load i32, ptr %origin0 @@ -443,14 +447,13 @@ define void @freeze_two_buildvectors_one_undef_elt(ptr %origin0, ptr %origin1, p ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl (%edx), %edx ; X86-NEXT: andl $15, %edx -; X86-NEXT: vmovd %edx, %xmm0 -; X86-NEXT: vpblendw {{.*#+}} xmm1 = xmm0[0,1,2,3,4,5,6,7] -; X86-NEXT: vmovddup {{.*#+}} xmm2 = [7,7] -; X86-NEXT: # xmm2 = mem[0,0] -; X86-NEXT: vpand %xmm2, %xmm1, %xmm1 -; X86-NEXT: vmovdqa %xmm1, (%ecx) -; X86-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7] -; X86-NEXT: vpand %xmm2, %xmm0, %xmm0 +; X86-NEXT: vmovddup {{.*#+}} xmm0 = [7,7] +; X86-NEXT: # xmm0 = mem[0,0] +; X86-NEXT: vmovd %edx, %xmm1 +; X86-NEXT: vpand %xmm0, %xmm1, %xmm2 +; X86-NEXT: vmovdqa %xmm2, (%ecx) +; X86-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7] +; X86-NEXT: vpand %xmm0, %xmm1, %xmm0 ; X86-NEXT: vmovdqa %xmm0, (%eax) ; X86-NEXT: retl ;