diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index f984d4b395964..062a9743b8cc0 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7620,6 +7620,7 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, const SDLoc &DL) { static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) { EVT VT = N0.getValueType(); + unsigned BW = VT.getScalarSizeInBits(); SDLoc DL(N); auto peekThroughResize = [](SDValue V) { @@ -7689,6 +7690,26 @@ static SDValue visitORCommutative(SelectionDAG &DAG, SDValue N0, SDValue N1, peekThroughZext(N0.getOperand(2)) == peekThroughZext(N1.getOperand(1))) return N0; + // Attempt to match a legalized build_pair-esque pattern: + // or(shl(aext(Hi),BW/2),zext(Lo)) + SDValue Lo, Hi; + if (sd_match(N0, + m_OneUse(m_Shl(m_AnyExt(m_Value(Hi)), m_SpecificInt(BW / 2)))) && + sd_match(N1, m_ZExt(m_Value(Lo))) && + Lo.getScalarValueSizeInBits() == (BW / 2) && + Lo.getValueType() == Hi.getValueType()) { + // Fold build_pair(not(Lo),not(Hi)) -> not(build_pair(Lo,Hi)). + SDValue NotLo, NotHi; + if (sd_match(Lo, m_OneUse(m_Not(m_Value(NotLo)))) && + sd_match(Hi, m_OneUse(m_Not(m_Value(NotHi))))) { + Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotLo); + Hi = DAG.getNode(ISD::ANY_EXTEND, DL, VT, NotHi); + Hi = DAG.getNode(ISD::SHL, DL, VT, Hi, + DAG.getShiftAmountConstant(BW / 2, VT, DL)); + return DAG.getNOT(DL, DAG.getNode(ISD::OR, DL, VT, Lo, Hi), VT); + } + } + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/combine-or.ll b/llvm/test/CodeGen/X86/combine-or.ll index 8419001de236c..3b2102f46a297 100644 --- a/llvm/test/CodeGen/X86/combine-or.ll +++ b/llvm/test/CodeGen/X86/combine-or.ll @@ -186,11 +186,10 @@ define i64 @or_build_pair_not(i32 %a0, i32 %a1) { ; CHECK-LABEL: or_build_pair_not: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $esi killed $esi def $rsi -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: notl %edi -; CHECK-NEXT: notl %esi ; CHECK-NEXT: shlq $32, %rsi -; CHECK-NEXT: leaq (%rsi,%rdi), %rax +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: orq %rsi, %rax +; CHECK-NEXT: notq %rax ; CHECK-NEXT: retq %n0 = xor i32 %a0, -1 %n1 = xor i32 %a1, -1 @@ -269,12 +268,11 @@ define i64 @PR89533(<64 x i8> %a0) { ; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm2 = [95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95,95] ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: vpmovmskb %ymm0, %eax -; AVX2-NEXT: notl %eax ; AVX2-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm0 ; AVX2-NEXT: vpmovmskb %ymm0, %ecx -; AVX2-NEXT: notl %ecx ; AVX2-NEXT: shlq $32, %rcx ; AVX2-NEXT: orq %rax, %rcx +; AVX2-NEXT: notq %rcx ; AVX2-NEXT: xorl %eax, %eax ; AVX2-NEXT: tzcntq %rcx, %rax ; AVX2-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll index e0f438eb7cc8f..ae66c5420638b 100644 --- a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll +++ b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll @@ -3060,12 +3060,7 @@ define void @vec384_v3i32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec. ; SCALAR: # %bb.0: ; SCALAR-NEXT: movl 8(%rdi), %eax ; SCALAR-NEXT: movq (%rdi), %rcx -; SCALAR-NEXT: movq %rcx, %rdi -; SCALAR-NEXT: shrq $32, %rdi -; SCALAR-NEXT: notl %edi -; SCALAR-NEXT: shlq $32, %rdi -; SCALAR-NEXT: notl %ecx -; SCALAR-NEXT: orq %rdi, %rcx +; SCALAR-NEXT: notq %rcx ; SCALAR-NEXT: notl %eax ; SCALAR-NEXT: movl %eax, 8(%rsi) ; SCALAR-NEXT: movq %rcx, (%rsi) @@ -3196,12 +3191,7 @@ define void @vec384_v3f32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec. ; SCALAR: # %bb.0: ; SCALAR-NEXT: movl 8(%rdi), %eax ; SCALAR-NEXT: movq (%rdi), %rcx -; SCALAR-NEXT: movq %rcx, %rdi -; SCALAR-NEXT: shrq $32, %rdi -; SCALAR-NEXT: notl %edi -; SCALAR-NEXT: shlq $32, %rdi -; SCALAR-NEXT: notl %ecx -; SCALAR-NEXT: orq %rdi, %rcx +; SCALAR-NEXT: notq %rcx ; SCALAR-NEXT: notl %eax ; SCALAR-NEXT: movl %eax, 8(%rsi) ; SCALAR-NEXT: movq %rcx, (%rsi) @@ -4216,25 +4206,10 @@ define void @vec384_v6i32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec. ; SCALAR: # %bb.0: ; SCALAR-NEXT: movq (%rdi), %rax ; SCALAR-NEXT: movq 8(%rdi), %rcx -; SCALAR-NEXT: movq %rax, %r8 -; SCALAR-NEXT: shrq $32, %r8 -; SCALAR-NEXT: movq %rcx, %r9 -; SCALAR-NEXT: shrq $32, %r9 ; SCALAR-NEXT: movq 16(%rdi), %rdi -; SCALAR-NEXT: movq %rdi, %r10 -; SCALAR-NEXT: shrq $32, %r10 -; SCALAR-NEXT: notl %r10d -; SCALAR-NEXT: shlq $32, %r10 -; SCALAR-NEXT: notl %edi -; SCALAR-NEXT: orq %r10, %rdi -; SCALAR-NEXT: notl %r9d -; SCALAR-NEXT: shlq $32, %r9 -; SCALAR-NEXT: notl %ecx -; SCALAR-NEXT: orq %r9, %rcx -; SCALAR-NEXT: notl %r8d -; SCALAR-NEXT: shlq $32, %r8 -; SCALAR-NEXT: notl %eax -; SCALAR-NEXT: orq %r8, %rax +; SCALAR-NEXT: notq %rdi +; SCALAR-NEXT: notq %rcx +; SCALAR-NEXT: notq %rax ; SCALAR-NEXT: movq %rax, (%rsi) ; SCALAR-NEXT: movq %rcx, 8(%rsi) ; SCALAR-NEXT: movq %rdi, 16(%rsi) @@ -4303,25 +4278,10 @@ define void @vec384_v6f32(ptr %in.subvec.ptr, ptr %out.subvec.ptr, ptr %out.vec. ; SCALAR: # %bb.0: ; SCALAR-NEXT: movq (%rdi), %rax ; SCALAR-NEXT: movq 8(%rdi), %rcx -; SCALAR-NEXT: movq %rax, %r8 -; SCALAR-NEXT: shrq $32, %r8 -; SCALAR-NEXT: movq %rcx, %r9 -; SCALAR-NEXT: shrq $32, %r9 ; SCALAR-NEXT: movq 16(%rdi), %rdi -; SCALAR-NEXT: movq %rdi, %r10 -; SCALAR-NEXT: shrq $32, %r10 -; SCALAR-NEXT: notl %r10d -; SCALAR-NEXT: shlq $32, %r10 -; SCALAR-NEXT: notl %edi -; SCALAR-NEXT: orq %r10, %rdi -; SCALAR-NEXT: notl %r9d -; SCALAR-NEXT: shlq $32, %r9 -; SCALAR-NEXT: notl %ecx -; SCALAR-NEXT: orq %r9, %rcx -; SCALAR-NEXT: notl %r8d -; SCALAR-NEXT: shlq $32, %r8 -; SCALAR-NEXT: notl %eax -; SCALAR-NEXT: orq %r8, %rax +; SCALAR-NEXT: notq %rdi +; SCALAR-NEXT: notq %rcx +; SCALAR-NEXT: notq %rax ; SCALAR-NEXT: movq %rax, (%rsi) ; SCALAR-NEXT: movq %rcx, 8(%rsi) ; SCALAR-NEXT: movq %rdi, 16(%rsi)