Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 21 additions & 4 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57617,10 +57617,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
}

// Fold any similar generic ADD/SUB opcodes to reuse this node.
auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) {
auto MatchGeneric = [&](unsigned Opc, SDValue N0, SDValue N1, bool Negate) {
SDValue Ops[] = {N0, N1};
SDVTList VTs = DAG.getVTList(N->getValueType(0));
if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) {
if (SDNode *GenericAddSub = DAG.getNodeIfExists(Opc, VTs, Ops)) {
SDValue Op(N, 0);
if (Negate) {
// Bail if this is only used by a user of the x86 add/sub.
Expand All @@ -57632,8 +57632,25 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
DCI.CombineTo(GenericAddSub, Op);
}
};
MatchGeneric(LHS, RHS, false);
MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
MatchGeneric(GenericOpc, LHS, RHS, false);
MatchGeneric(GenericOpc, RHS, LHS, X86ISD::SUB == N->getOpcode());

if (auto *Const = dyn_cast<ConstantSDNode>(RHS)) {
SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
if (X86ISD::SUB == N->getOpcode()) {
// Fold generic add(LHS, -C) to X86ISD::SUB(LHS, C).
MatchGeneric(ISD::ADD, LHS, NegC, false);
} else {
// Negate X86ISD::ADD(LHS, C) and replace generic sub(-C, LHS).
MatchGeneric(ISD::SUB, NegC, LHS, true);
}
} else if (auto *Const = dyn_cast<ConstantSDNode>(LHS)) {
if (X86ISD::SUB == N->getOpcode()) {
SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
// Negate X86ISD::SUB(C, RHS) and replace generic add(RHS, -C).
MatchGeneric(ISD::ADD, RHS, NegC, true);
}
}

// TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the
// EFLAGS result doesn't change.
Expand Down
48 changes: 48 additions & 0 deletions llvm/test/CodeGen/X86/combine-adc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -89,4 +89,52 @@ define i32 @adc_merge_constants(i32 %a0) nounwind {
ret i32 %sum
}

define i32 @adc_merge_sub(i32 %a0) nounwind {
; X86-LABEL: adc_merge_sub:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: addl $42, %edi
; X86-NEXT: setb %al
; X86-NEXT: movl %edi, %esi
; X86-NEXT: negl %esi
; X86-NEXT: pushl %eax
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: xorl %edi, %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: adc_merge_sub:
; X64: # %bb.0:
; X64-NEXT: pushq %rbp
; X64-NEXT: pushq %rbx
; X64-NEXT: pushq %rax
; X64-NEXT: movl %edi, %ebx
; X64-NEXT: xorl %edi, %edi
; X64-NEXT: addl $42, %ebx
; X64-NEXT: setb %dil
; X64-NEXT: movl %ebx, %ebp
; X64-NEXT: negl %ebp
; X64-NEXT: callq use@PLT
; X64-NEXT: xorl %ebx, %ebp
; X64-NEXT: movl %ebp, %eax
; X64-NEXT: addq $8, %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: popq %rbp
; X64-NEXT: retq
%adc = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %a0, i32 42)
%carry = extractvalue { i8, i32 } %adc, 0
call void @use(i8 %carry)
%sum = extractvalue { i8, i32 } %adc, 1
%sub = sub i32 -42, %a0
%result = xor i32 %sum, %sub
ret i32 %result
}

declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32)
declare void @use(i8)
81 changes: 81 additions & 0 deletions llvm/test/CodeGen/X86/combine-sbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -333,4 +333,85 @@ define i32 @PR40483_sub6(ptr, i32) nounwind {
ret i32 %10
}

define i32 @sbb_merge_add1(i32 %a0) nounwind {
; X86-LABEL: sbb_merge_add1:
; X86: # %bb.0:
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl $42, {{[0-9]+}}(%esp)
; X86-NEXT: setb %al
; X86-NEXT: pushl %eax
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sbb_merge_add1:
; X64: # %bb.0:
; X64-NEXT: pushq %rax
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: cmpl $42, %edi
; X64-NEXT: setb %al
; X64-NEXT: movl %eax, %edi
; X64-NEXT: callq use@PLT
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: popq %rcx
; X64-NEXT: retq
%sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %a0, i32 42)
%borrow = extractvalue { i8, i32 } %sbb, 0
call void @use(i8 %borrow)
%diff = extractvalue { i8, i32 } %sbb, 1
%add = add i32 %a0, -42
%result = xor i32 %diff, %add
ret i32 %result
}

define i32 @sbb_merge_add2(i32 %a0) nounwind {
; X86-LABEL: sbb_merge_add2:
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl $42, %edi
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
; X86-NEXT: setb %al
; X86-NEXT: movl %edi, %esi
; X86-NEXT: negl %esi
; X86-NEXT: pushl %eax
; X86-NEXT: calll use@PLT
; X86-NEXT: addl $4, %esp
; X86-NEXT: xorl %edi, %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
;
; X64-LABEL: sbb_merge_add2:
; X64: # %bb.0:
; X64-NEXT: pushq %rbp
; X64-NEXT: pushq %rbx
; X64-NEXT: pushq %rax
; X64-NEXT: movl $42, %ebp
; X64-NEXT: xorl %eax, %eax
; X64-NEXT: subl %edi, %ebp
; X64-NEXT: setb %al
; X64-NEXT: movl %ebp, %ebx
; X64-NEXT: negl %ebx
; X64-NEXT: movl %eax, %edi
; X64-NEXT: callq use@PLT
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: movl %ebx, %eax
; X64-NEXT: addq $8, %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: popq %rbp
; X64-NEXT: retq
%sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 42, i32 %a0)
%borrow = extractvalue { i8, i32 } %sbb, 0
call void @use(i8 %borrow)
%diff = extractvalue { i8, i32 } %sbb, 1
%add = add i32 %a0, -42
%result = xor i32 %diff, %add
ret i32 %result
}

declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32)
declare void @use(i8)
129 changes: 66 additions & 63 deletions llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,17 @@ entry:
define void @_Z2x6v() local_unnamed_addr {
; CHECK-LABEL: _Z2x6v:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax
; CHECK-NEXT: movl (%rax), %edx
; CHECK-NEXT: andl $511, %edx # imm = 0x1FF
; CHECK-NEXT: leaq 1(%rdx), %rax
; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx
; CHECK-NEXT: movl %eax, (%rcx)
; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx
; CHECK-NEXT: movl (%rcx), %ecx
; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: je .LBB1_18
; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
Expand All @@ -114,58 +125,47 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
; CHECK-NEXT: movq x1@GOTPCREL(%rip), %rax
; CHECK-NEXT: movl (%rax), %ebx
; CHECK-NEXT: andl $511, %ebx # imm = 0x1FF
; CHECK-NEXT: leaq 1(%rbx), %rax
; CHECK-NEXT: movq x4@GOTPCREL(%rip), %rcx
; CHECK-NEXT: movl %eax, (%rcx)
; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rcx
; CHECK-NEXT: movl (%rcx), %ecx
; CHECK-NEXT: testl %ecx, %ecx
; CHECK-NEXT: je .LBB1_18
; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph
; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rdx
; CHECK-NEXT: movq (%rdx), %rsi
; CHECK-NEXT: movl %ecx, %edx
; CHECK-NEXT: notl %edx
; CHECK-NEXT: leaq 8(,%rdx,8), %rdi
; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rsi
; CHECK-NEXT: movq (%rsi), %rsi
; CHECK-NEXT: movl %ecx, %edi
; CHECK-NEXT: notl %edi
; CHECK-NEXT: leaq 8(,%rdi,8), %rdi
; CHECK-NEXT: imulq %rax, %rdi
; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: movq x2@GOTPCREL(%rip), %r8
; CHECK-NEXT: movl (%r8), %edx
; CHECK-NEXT: leal 8(,%rbx,8), %eax
; CHECK-NEXT: movl (%r8), %r9d
; CHECK-NEXT: leal 8(,%rdx,8), %eax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK-NEXT: leaq 32(%rsi), %r11
; CHECK-NEXT: leaq 8(,%rbx,8), %rbx
; CHECK-NEXT: xorl %r14d, %r14d
; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r15
; CHECK-NEXT: movq %rsi, %r12
; CHECK-NEXT: leaq 32(%rsi), %rbx
; CHECK-NEXT: leaq 8(,%rdx,8), %r14
; CHECK-NEXT: xorl %r15d, %r15d
; CHECK-NEXT: movq x0@GOTPCREL(%rip), %r12
; CHECK-NEXT: movq %rsi, %r13
; CHECK-NEXT: jmp .LBB1_2
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_15: # %for.cond1.for.inc3_crit_edge
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movl %edx, (%r8)
; CHECK-NEXT: movl %r9d, (%r8)
; CHECK-NEXT: .LBB1_16: # %for.inc3
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: addq %rbx, %r12
; CHECK-NEXT: incq %r14
; CHECK-NEXT: addq %rbx, %r11
; CHECK-NEXT: addq %r14, %r13
; CHECK-NEXT: incq %r15
; CHECK-NEXT: addq %r14, %rbx
; CHECK-NEXT: incl %ecx
; CHECK-NEXT: je .LBB1_17
; CHECK-NEXT: .LBB1_2: # %for.cond1thread-pre-split
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB1_12 Depth 2
; CHECK-NEXT: # Child Loop BB1_14 Depth 2
; CHECK-NEXT: testl %edx, %edx
; CHECK-NEXT: testl %r9d, %r9d
; CHECK-NEXT: jns .LBB1_16
; CHECK-NEXT: # %bb.3: # %for.body2.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movslq %edx, %r13
; CHECK-NEXT: testq %r13, %r13
; CHECK-NEXT: movslq %r9d, %r9
; CHECK-NEXT: testq %r9, %r9
; CHECK-NEXT: movq $-1, %rbp
; CHECK-NEXT: cmovnsq %r13, %rbp
; CHECK-NEXT: subq %r13, %rbp
; CHECK-NEXT: cmovnsq %r9, %rbp
; CHECK-NEXT: subq %r9, %rbp
; CHECK-NEXT: incq %rbp
; CHECK-NEXT: cmpq $4, %rbp
; CHECK-NEXT: jb .LBB1_14
Expand All @@ -177,20 +177,20 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: # %bb.5: # %vector.memcheck
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
; CHECK-NEXT: imulq %r14, %rax
; CHECK-NEXT: leaq (%rsi,%rax), %r10
; CHECK-NEXT: leaq (%r10,%r13,8), %r9
; CHECK-NEXT: testq %r13, %r13
; CHECK-NEXT: movq $-1, %r10
; CHECK-NEXT: cmovnsq %r13, %r10
; CHECK-NEXT: cmpq %r15, %r9
; CHECK-NEXT: imulq %r15, %rax
; CHECK-NEXT: leaq (%rsi,%rax), %r11
; CHECK-NEXT: leaq (%r11,%r9,8), %r10
; CHECK-NEXT: testq %r9, %r9
; CHECK-NEXT: movq $-1, %r11
; CHECK-NEXT: cmovnsq %r9, %r11
; CHECK-NEXT: cmpq %r12, %r10
; CHECK-NEXT: jae .LBB1_7
; CHECK-NEXT: # %bb.6: # %vector.memcheck
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: leaq 8(%rsi), %r9
; CHECK-NEXT: addq %r9, %rax
; CHECK-NEXT: leaq (%rax,%r10,8), %rax
; CHECK-NEXT: cmpq %r15, %rax
; CHECK-NEXT: leaq 8(%rsi), %r10
; CHECK-NEXT: addq %r10, %rax
; CHECK-NEXT: leaq (%rax,%r11,8), %rax
; CHECK-NEXT: cmpq %r12, %rax
; CHECK-NEXT: ja .LBB1_14
; CHECK-NEXT: .LBB1_7: # %vector.body.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
Expand All @@ -201,58 +201,54 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; CHECK-NEXT: movdqu %xmm0, (%r12,%r13,8)
; CHECK-NEXT: movdqu %xmm0, 16(%r12,%r13,8)
; CHECK-NEXT: movl $4, %r10d
; CHECK-NEXT: movdqu %xmm0, (%r13,%r9,8)
; CHECK-NEXT: movdqu %xmm0, 16(%r13,%r9,8)
; CHECK-NEXT: movl $4, %r11d
; CHECK-NEXT: shrq $2, %rax
; CHECK-NEXT: jne .LBB1_11
; CHECK-NEXT: jmp .LBB1_13
; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: xorl %r10d, %r10d
; CHECK-NEXT: xorl %r11d, %r11d
; CHECK-NEXT: shrq $2, %rax
; CHECK-NEXT: je .LBB1_13
; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
; CHECK-NEXT: movq %r10, %rax
; CHECK-NEXT: movq %r11, %rax
; CHECK-NEXT: subq %rdx, %rax
; CHECK-NEXT: addq %r13, %r10
; CHECK-NEXT: leaq (%r11,%r10,8), %r10
; CHECK-NEXT: addq %r9, %r11
; CHECK-NEXT: leaq (%rbx,%r11,8), %r11
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_12: # %vector.body
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: movdqu %xmm0, -32(%r10)
; CHECK-NEXT: movdqu %xmm0, -16(%r10)
; CHECK-NEXT: movdqu %xmm0, (%r10)
; CHECK-NEXT: movdqu %xmm0, 16(%r10)
; CHECK-NEXT: addq $64, %r10
; CHECK-NEXT: movdqu %xmm0, -32(%r11)
; CHECK-NEXT: movdqu %xmm0, -16(%r11)
; CHECK-NEXT: movdqu %xmm0, (%r11)
; CHECK-NEXT: movdqu %xmm0, 16(%r11)
; CHECK-NEXT: addq $64, %r11
; CHECK-NEXT: addq $8, %rax
; CHECK-NEXT: jne .LBB1_12
; CHECK-NEXT: .LBB1_13: # %middle.block
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: addq %rdx, %r13
; CHECK-NEXT: addq %rdx, %r9
; CHECK-NEXT: cmpq %rdx, %rbp
; CHECK-NEXT: movq %r13, %rdx
; CHECK-NEXT: je .LBB1_15
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_14: # %for.body2
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: movq (%r15), %rax
; CHECK-NEXT: movq %rax, (%r12,%r13,8)
; CHECK-NEXT: leaq 1(%r13), %rdx
; CHECK-NEXT: cmpq $-1, %r13
; CHECK-NEXT: movq %rdx, %r13
; CHECK-NEXT: movq (%r12), %rax
; CHECK-NEXT: movq %rax, (%r13,%r9,8)
; CHECK-NEXT: incq %r9
; CHECK-NEXT: jl .LBB1_14
; CHECK-NEXT: jmp .LBB1_15
; CHECK-NEXT: .LBB1_17: # %for.cond.for.end5_crit_edge
; CHECK-NEXT: movq x5@GOTPCREL(%rip), %rax
; CHECK-NEXT: movq %rdi, (%rax)
; CHECK-NEXT: movq x3@GOTPCREL(%rip), %rax
; CHECK-NEXT: movl $0, (%rax)
; CHECK-NEXT: .LBB1_18: # %for.end5
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
Expand All @@ -265,6 +261,13 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .cfi_restore %rbx
; CHECK-NEXT: .cfi_restore %r12
; CHECK-NEXT: .cfi_restore %r13
; CHECK-NEXT: .cfi_restore %r14
; CHECK-NEXT: .cfi_restore %r15
; CHECK-NEXT: .cfi_restore %rbp
; CHECK-NEXT: .LBB1_18: # %for.end5
; CHECK-NEXT: retq
entry:
%0 = load i32, ptr @x1, align 4
Expand Down
Loading