Skip to content

Commit

Permalink
[x86] improve CMOV codegen by pushing add into operands
Browse files Browse the repository at this point in the history
This is not the transform direction we want in general,
but by the time we have a CMOV, we've already tried
everything else that could be better.
The transform increases the uses of the other add operand,
but that is safe according to Alive2:
https://alive2.llvm.org/ce/z/Yn6p-A

We could probably extend this to other binops (not just add).
This is the motivating pattern discussed in:
https://llvm.org/PR51069

The test with i8 shows a missed fold because there's a trunc
sitting in front of the add. That can be handled with a small
follow-up.

Differential Revision: https://reviews.llvm.org/D106607
  • Loading branch information
rotateright committed Jul 23, 2021
1 parent 028eb43 commit f060aa1
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 12 deletions.
38 changes: 38 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49867,13 +49867,51 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
PMADDBuilder);
}

/// CMOV of constants requires materializing constant operands in registers.
/// Try to fold those constants into an 'add' instruction to reduce instruction
/// count. We do this with CMOV rather the generic 'select' because there are
/// earlier folds that may be used to turn select-of-constants into logic hacks.
static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) {
// This checks for a zero operand because add-of-0 gets simplified away.
// TODO: Allow generating an extra add?
auto isSuitableCmov = [](SDValue V) {
if (V.getOpcode() != X86ISD::CMOV || !V.hasOneUse())
return false;
return isa<ConstantSDNode>(V.getOperand(0)) &&
isa<ConstantSDNode>(V.getOperand(1)) &&
(isNullConstant(V.getOperand(0)) || isNullConstant(V.getOperand(1)));
};

// Match an appropriate CMOV as the first operand of the add.
SDValue Cmov = N->getOperand(0);
SDValue OtherOp = N->getOperand(1);
if (!isSuitableCmov(Cmov))
std::swap(Cmov, OtherOp);
if (!isSuitableCmov(Cmov))
return SDValue();

// add (cmov C, 0), OtherOp --> cmov (add OtherOp, C), OtherOp
// add (cmov 0, C), OtherOp --> cmov OtherOp, (add OtherOp, C)
SDLoc DL(N);
SDValue FalseOp = Cmov.getOperand(0);
SDValue TrueOp = Cmov.getOperand(1);
EVT VT = N->getValueType(0);
FalseOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, FalseOp);
TrueOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, TrueOp);
return DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, Cmov.getOperand(2),
Cmov.getOperand(3));
}

static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
EVT VT = N->getValueType(0);
SDValue Op0 = N->getOperand(0);
SDValue Op1 = N->getOperand(1);

if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG))
return Select;

if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, SDLoc(N), VT, Subtarget))
return MAdd;
if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, SDLoc(N), VT, Subtarget))
Expand Down
20 changes: 8 additions & 12 deletions llvm/test/CodeGen/X86/add-cmov.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
define i64 @select_consts_i64(i64 %offset, i32 %x) {
; CHECK-LABEL: select_consts_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: leaq 42(%rdi), %rax
; CHECK-NEXT: testl %esi, %esi
; CHECK-NEXT: movl $42, %eax
; CHECK-NEXT: cmovneq %rcx, %rax
; CHECK-NEXT: addq %rdi, %rax
; CHECK-NEXT: cmovneq %rdi, %rax
; CHECK-NEXT: retq
%b = icmp eq i32 %x, 0
%s = select i1 %b, i64 42, i64 0
Expand All @@ -19,11 +17,10 @@ define i64 @select_consts_i64(i64 %offset, i32 %x) {
define i32 @select_consts_i32(i32 %offset, i64 %x) {
; CHECK-LABEL: select_consts_i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: leal 43(%rdi), %eax
; CHECK-NEXT: cmpq $42, %rsi
; CHECK-NEXT: movl $43, %eax
; CHECK-NEXT: cmovgel %ecx, %eax
; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: cmovgel %edi, %eax
; CHECK-NEXT: retq
%b = icmp sgt i64 %x, 41
%s = select i1 %b, i32 0, i32 43
Expand All @@ -34,11 +31,10 @@ define i32 @select_consts_i32(i32 %offset, i64 %x) {
define i16 @select_consts_i16(i16 %offset, i1 %b) {
; CHECK-LABEL: select_consts_i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xorl %ecx, %ecx
; CHECK-NEXT: # kill: def $edi killed $edi def $rdi
; CHECK-NEXT: leal 44(%rdi), %eax
; CHECK-NEXT: testb $1, %sil
; CHECK-NEXT: movl $44, %eax
; CHECK-NEXT: cmovel %ecx, %eax
; CHECK-NEXT: addl %edi, %eax
; CHECK-NEXT: cmovel %edi, %eax
; CHECK-NEXT: # kill: def $ax killed $ax killed $eax
; CHECK-NEXT: retq
%s = select i1 %b, i16 44, i16 0
Expand Down

0 comments on commit f060aa1

Please sign in to comment.