Skip to content

Commit

Permalink
[X86] Reuse EFLAGS and form LOCKed ops when only user is SETCC.
Browse files Browse the repository at this point in the history
We only generate LOCKed versions of add/sub when the result is unused.
It often happens that the result is used, but only by a comparison. We
can optimize those out by reusing EFLAGS, which lets us use the proper
instructions, instead of having to fallback to LXADD.

Instead of doing this as an MI peephole (as we do for the other
non-LOCKed (really, non-MR) forms), do it in ISel. It becomes quite
tricky later.

This also makes it eventually possible to stop expanding and/or/xor
if the only user is an icmp (also see D18141).

This uses the LOCK ISD opcodes added by r262244.

Differential Revision: http://reviews.llvm.org/D17633

llvm-svn: 265450
  • Loading branch information
ahmedbougacha committed Apr 5, 2016
1 parent b235d32 commit 50e6cd4
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 30 deletions.
91 changes: 76 additions & 15 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26134,6 +26134,56 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
return SDValue();
}

/// Combine:
/// (brcond/cmov/setcc .., (cmp (atomic_load_op ..), 0), cc)
/// to:
/// (brcond/cmov/setcc .., (LOCKed op ..), cc)
/// i.e., reusing the EFLAGS produced by the LOCKed instruction.
/// Note that this is only legal for some op/cc combinations.
static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode CC,
SelectionDAG &DAG) {
// This combine only operates on CMP-like nodes.
if (!(Cmp.getOpcode() == X86ISD::CMP ||
(Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0))))
return SDValue();

SDValue LHS = Cmp.getOperand(0);
SDValue RHS = Cmp.getOperand(1);

if (!LHS.hasOneUse())
return SDValue();

// FIXME: We can do this for XOR/OR/AND as well, but only if they survive
// AtomicExpand. Currently, we choose to expand them to cmpxchg if they
// have any users. Could we relax that to ignore (icmp x,0) users?
switch (LHS->getOpcode()) {
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
break;
default:
return SDValue();
}

auto *C = dyn_cast<ConstantSDNode>(RHS);
if (!C || C->getZExtValue() != 0)
return SDValue();

// Don't do this for all condition codes, as OF/CF are cleared by (CMP x,0)
// but might be set by arithmetic. Furthermore, we might later select INC/DEC,
// which don't modify CF (though CCs using CF should have been optimized out).
// SF/ZF are safe as they are set the same way.
// Note that in theory, the transformation is also valid for P/NP.
if (CC != X86::COND_E && CC != X86::COND_NE && CC != X86::COND_S &&
CC != X86::COND_NS)
return SDValue();

SDValue LockOp = lowerAtomicArithWithLOCK(LHS, DAG);
DAG.ReplaceAllUsesOfValueWith(LHS.getValue(0),
DAG.getUNDEF(LHS.getValueType()));
DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LockOp.getValue(1));
return LockOp;
}

// Check whether a boolean test is testing a boolean value generated by
// X86ISD::SETCC. If so, return the operand of that SETCC and proper condition
// code.
Expand Down Expand Up @@ -26305,6 +26355,16 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
return true;
}

/// Optimize an EFLAGS definition used according to the condition code \p CC
/// into a simpler EFLAGS value, potentially returning a new \p CC and replacing
/// uses of chain values.
static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC,
SelectionDAG &DAG) {
if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC))
return R;
return combineSetCCAtomicArith(EFLAGS, CC, DAG);
}

/// Optimize X86ISD::CMOV [LHS, RHS, CONDCODE (e.g. X86::COND_NE), CONDVAL]
static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
Expand All @@ -26331,15 +26391,14 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
}
}

SDValue Flags;

Flags = checkBoolTestSetCCCombine(Cond, CC);
if (Flags.getNode() &&
// Extra check as FCMOV only supports a subset of X86 cond.
(FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC))) {
SDValue Ops[] = { FalseOp, TrueOp,
DAG.getConstant(CC, DL, MVT::i8), Flags };
return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
// Try to simplify the EFLAGS and condition code operands.
// We can't always do this as FCMOV only supports a subset of X86 cond.
if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG)) {
if (FalseOp.getValueType() != MVT::f80 || hasFPCMov(CC)) {
SDValue Ops[] = {FalseOp, TrueOp, DAG.getConstant(CC, DL, MVT::i8),
Flags};
return DAG.getNode(X86ISD::CMOV, DL, N->getVTList(), Ops);
}
}

// If this is a select between two integer constants, try to do some
Expand Down Expand Up @@ -29265,7 +29324,8 @@ static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
if (CC == X86::COND_B)
return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));

if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
// Try to simplify the EFLAGS and condition code operands.
if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
}
Expand All @@ -29278,15 +29338,16 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
SDLoc DL(N);
SDValue Chain = N->getOperand(0);
SDValue Dest = N->getOperand(1);
SDValue EFLAGS = N->getOperand(3);
X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2));

if (SDValue Flags = checkBoolTestSetCCCombine(EFLAGS, CC)) {
// Try to simplify the EFLAGS and condition code operands.
// Make sure to not keep references to operands, as combineSetCCEFLAGS can
// RAUW them under us.
if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), Chain, Dest, Cond,
Flags);
return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0),
N->getOperand(1), Cond, Flags);
}

return SDValue();
Expand Down
20 changes: 5 additions & 15 deletions llvm/test/CodeGen/X86/atomic-eflags-reuse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,7 @@
define i8 @test_add_1_setcc_ne(i64* %p) #0 {
; CHECK-LABEL: test_add_1_setcc_ne:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movl $1, %eax
; CHECK-NEXT: lock xaddq %rax, (%rdi)
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: lock incq (%rdi)
; CHECK-NEXT: setne %al
; CHECK-NEXT: retq
entry:
Expand All @@ -19,9 +17,7 @@ entry:
define i8 @test_sub_1_setcc_eq(i64* %p) #0 {
; CHECK-LABEL: test_sub_1_setcc_eq:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movq $-1, %rax
; CHECK-NEXT: lock xaddq %rax, (%rdi)
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: lock decq (%rdi)
; CHECK-NEXT: sete %al
; CHECK-NEXT: retq
entry:
Expand Down Expand Up @@ -49,9 +45,7 @@ entry:
define i8 @test_sub_10_setcc_sge(i64* %p) #0 {
; CHECK-LABEL: test_sub_10_setcc_sge:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movq $-10, %rax
; CHECK-NEXT: lock xaddq %rax, (%rdi)
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: lock addq $-10, (%rdi)
; CHECK-NEXT: setns %al
; CHECK-NEXT: retq
entry:
Expand All @@ -66,9 +60,7 @@ entry:
define i32 @test_add_10_brcond_sge(i64* %p, i32 %a0, i32 %a1) #0 {
; CHECK-LABEL: test_add_10_brcond_sge:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movl $10, %eax
; CHECK-NEXT: lock xaddq %rax, (%rdi)
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: lock addq $10, (%rdi)
; CHECK-NEXT: js .LBB4_2
; CHECK-NEXT: # BB#1: # %t
; CHECK-NEXT: movl %esi, %eax
Expand All @@ -89,9 +81,7 @@ f:
define i32 @test_sub_1_cmov_slt(i64* %p, i32 %a0, i32 %a1) #0 {
; CHECK-LABEL: test_sub_1_cmov_slt:
; CHECK: # BB#0: # %entry
; CHECK-NEXT: movq $-1, %rax
; CHECK-NEXT: lock xaddq %rax, (%rdi)
; CHECK-NEXT: testq %rax, %rax
; CHECK-NEXT: lock decq (%rdi)
; CHECK-NEXT: cmovnsl %edx, %esi
; CHECK-NEXT: movl %esi, %eax
; CHECK-NEXT: retq
Expand Down

0 comments on commit 50e6cd4

Please sign in to comment.