Skip to content

Commit

Permalink
[X86] Remove X86ISD::INC/DEC. Just select them from X86ISD::ADD/SUB a…
Browse files Browse the repository at this point in the history
…t isel time

INC/DEC are pretty much the same as ADD/SUB except that they don't update the C flag.

This patch removes the special nodes and just pattern matches from ADD/SUB during isel if the C flag isn't being used.

I had to avoid selecting DEC is the result isn't used. This will become a SUB immediate which will turned into a CMP later by optimizeCompareInstr. This lead to the one test change where we use a CMP instead of a DEC for an overflow intrinsic since we only checked the flag.

This also exposed a hole in our RMW flag matching use of hasNoCarryFlagUses. Our root node for the match is a store and there's no guarantee that all the flag users have been selected yet. So hasNoCarryFlagUses needs to check copyToReg and machine opcodes, but it also needs to check for the pre-match SETCC, SETCC_CARRY, BRCOND, and CMOV opcodes.

Differential Revision: https://reviews.llvm.org/D55975

llvm-svn: 350245
  • Loading branch information
topperc committed Jan 2, 2019
1 parent 10ac299 commit 9d4860e
Show file tree
Hide file tree
Showing 8 changed files with 178 additions and 210 deletions.
18 changes: 6 additions & 12 deletions llvm/lib/Target/X86/X86FastISel.cpp
Expand Up @@ -2900,23 +2900,15 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
isCommutativeIntrinsic(II))
std::swap(LHS, RHS);

bool UseIncDec = false;
if (isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isOne())
UseIncDec = true;

unsigned BaseOpc, CondOpc;
switch (II->getIntrinsicID()) {
default: llvm_unreachable("Unexpected intrinsic!");
case Intrinsic::sadd_with_overflow:
BaseOpc = UseIncDec ? unsigned(X86ISD::INC) : unsigned(ISD::ADD);
CondOpc = X86::SETOr;
break;
BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break;
case Intrinsic::uadd_with_overflow:
BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break;
case Intrinsic::ssub_with_overflow:
BaseOpc = UseIncDec ? unsigned(X86ISD::DEC) : unsigned(ISD::SUB);
CondOpc = X86::SETOr;
break;
BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break;
case Intrinsic::usub_with_overflow:
BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break;
case Intrinsic::smul_with_overflow:
Expand All @@ -2938,9 +2930,11 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
{ X86::DEC8r, X86::DEC16r, X86::DEC32r, X86::DEC64r }
};

if (BaseOpc == X86ISD::INC || BaseOpc == X86ISD::DEC) {
if (CI->isOne() && (BaseOpc == ISD::ADD || BaseOpc == ISD::SUB) &&
CondOpc == X86::SETOr) {
// We can use INC/DEC.
ResultReg = createResultReg(TLI.getRegClassFor(VT));
bool IsDec = BaseOpc == X86ISD::DEC;
bool IsDec = BaseOpc == ISD::SUB;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(Opc[IsDec][VT.SimpleTy-MVT::i8]), ResultReg)
.addReg(LHSReg, getKillRegState(LHSIsKill));
Expand Down
114 changes: 74 additions & 40 deletions llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
Expand Up @@ -2327,6 +2327,22 @@ bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
return true;
}

static bool mayUseCarryFlag(X86::CondCode CC) {
switch (CC) {
// Comparisons which don't examine the CF flag.
case X86::COND_O: case X86::COND_NO:
case X86::COND_E: case X86::COND_NE:
case X86::COND_S: case X86::COND_NS:
case X86::COND_P: case X86::COND_NP:
case X86::COND_L: case X86::COND_GE:
case X86::COND_G: case X86::COND_LE:
return false;
// Anything else: assume conservatively.
default:
return true;
}
}

/// Test whether the given node which sets flags has any uses which require the
/// CF flag to be accurate.
bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const {
Expand All @@ -2336,36 +2352,49 @@ bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const {
// Only check things that use the flags.
if (UI.getUse().getResNo() != Flags.getResNo())
continue;
// Only examine CopyToReg uses that copy to EFLAGS.
if (UI->getOpcode() != ISD::CopyToReg ||
cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
return false;
// Examine each user of the CopyToReg use.
for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
FlagUI != FlagUE; ++FlagUI) {
// Only examine the Flag result.
if (FlagUI.getUse().getResNo() != 1)
continue;
// Anything unusual: assume conservatively.
if (!FlagUI->isMachineOpcode())
return false;
// Examine the condition code of the user.
X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());

switch (CC) {
// Comparisons which don't examine the CF flag.
case X86::COND_O: case X86::COND_NO:
case X86::COND_E: case X86::COND_NE:
case X86::COND_S: case X86::COND_NS:
case X86::COND_P: case X86::COND_NP:
case X86::COND_L: case X86::COND_GE:
case X86::COND_G: case X86::COND_LE:
continue;
// Anything else: assume conservatively.
default:
unsigned UIOpc = UI->getOpcode();

if (UIOpc == ISD::CopyToReg) {
// Only examine CopyToReg uses that copy to EFLAGS.
if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS)
return false;
// Examine each user of the CopyToReg use.
for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end();
FlagUI != FlagUE; ++FlagUI) {
// Only examine the Flag result.
if (FlagUI.getUse().getResNo() != 1)
continue;
// Anything unusual: assume conservatively.
if (!FlagUI->isMachineOpcode())
return false;
// Examine the condition code of the user.
X86::CondCode CC = getCondFromOpc(FlagUI->getMachineOpcode());

if (mayUseCarryFlag(CC))
return false;
}

// This CopyToReg is ok. Move on to the next user.
continue;
}

// This might be an unselected node. So look for the pre-isel opcodes that
// use flags.
unsigned CCOpNo;
switch (UIOpc) {
default:
// Something unusual. Be conservative.
return false;
case X86ISD::SETCC: CCOpNo = 0; break;
case X86ISD::SETCC_CARRY: CCOpNo = 0; break;
case X86ISD::CMOV: CCOpNo = 2; break;
case X86ISD::BRCOND: CCOpNo = 2; break;
}

X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo);
if (mayUseCarryFlag(CC))
return false;
}
return true;
}
Expand Down Expand Up @@ -2521,8 +2550,6 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {
switch (Opc) {
default:
return false;
case X86ISD::INC:
case X86ISD::DEC:
case X86ISD::SUB:
case X86ISD::SBB:
break;
Expand Down Expand Up @@ -2573,20 +2600,27 @@ bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) {

MachineSDNode *Result;
switch (Opc) {
case X86ISD::INC:
case X86ISD::DEC: {
unsigned NewOpc =
Opc == X86ISD::INC
? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
: SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
Result =
CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, Ops);
break;
}
case X86ISD::ADD:
case X86ISD::ADC:
case X86ISD::SUB:
// Try to match inc/dec.
if (!Subtarget->slowIncDec() ||
CurDAG->getMachineFunction().getFunction().optForSize()) {
bool IsOne = isOneConstant(StoredVal.getOperand(1));
bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1));
// ADD/SUB with 1/-1 and carry flag isn't used can use inc/dec.
if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) {
unsigned NewOpc =
((Opc == X86ISD::ADD) == IsOne)
? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m)
: SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m);
const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain};
Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32,
MVT::Other, Ops);
break;
}
}
LLVM_FALLTHROUGH;
case X86ISD::ADC:
case X86ISD::SBB:
case X86ISD::AND:
case X86ISD::OR:
Expand Down
101 changes: 8 additions & 93 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -18644,56 +18644,20 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
// which may be the result of a CAST. We use the variable 'Op', which is the
// non-casted variable when we check for possible users.
switch (ArithOp.getOpcode()) {
case ISD::ADD:
// We only want to rewrite this as a target-specific node with attached
// flags if there is a reasonable chance of either using that to do custom
// instructions selection that can fold some of the memory operands, or if
// only the flags are used. If there are other uses, leave the node alone
// and emit a test instruction.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() != ISD::CopyToReg &&
UI->getOpcode() != ISD::SETCC &&
UI->getOpcode() != ISD::STORE)
goto default_case;

if (auto *C = dyn_cast<ConstantSDNode>(ArithOp.getOperand(1))) {
// An add of one will be selected as an INC.
if (C->isOne() &&
(!Subtarget.slowIncDec() ||
DAG.getMachineFunction().getFunction().optForSize())) {
Opcode = X86ISD::INC;
NumOperands = 1;
break;
}

// An add of negative one (subtract of one) will be selected as a DEC.
if (C->isAllOnesValue() &&
(!Subtarget.slowIncDec() ||
DAG.getMachineFunction().getFunction().optForSize())) {
Opcode = X86ISD::DEC;
NumOperands = 1;
break;
}
}

// Otherwise use a regular EFLAGS-setting add.
Opcode = X86ISD::ADD;
NumOperands = 2;
break;

case ISD::AND:
// If the primary 'and' result isn't used, don't bother using X86ISD::AND,
// because a TEST instruction will be better.
if (!hasNonFlagsUse(Op))
break;

LLVM_FALLTHROUGH;
case ISD::ADD:
case ISD::SUB:
case ISD::OR:
case ISD::XOR:
// Similar to ISD::ADD above, check if the uses will preclude useful
// lowering of the target-specific node.
// Transform to an x86-specific ALU node with flags if there is a chance of
// using an RMW op or only the flags are used. Otherwise, leave
// the node alone and emit a 'test' instruction.
for (SDNode::use_iterator UI = Op.getNode()->use_begin(),
UE = Op.getNode()->use_end(); UI != UE; ++UI)
if (UI->getOpcode() != ISD::CopyToReg &&
Expand All @@ -18704,6 +18668,7 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
// Otherwise use a regular EFLAGS-setting instruction.
switch (ArithOp.getOpcode()) {
default: llvm_unreachable("unexpected operator!");
case ISD::ADD: Opcode = X86ISD::ADD; break;
case ISD::SUB: Opcode = X86ISD::SUB; break;
case ISD::XOR: Opcode = X86ISD::XOR; break;
case ISD::AND: Opcode = X86ISD::AND; break;
Expand All @@ -18714,8 +18679,6 @@ static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
break;
case X86ISD::ADD:
case X86ISD::SUB:
case X86ISD::INC:
case X86ISD::DEC:
case X86ISD::OR:
case X86ISD::XOR:
case X86ISD::AND:
Expand Down Expand Up @@ -19603,13 +19566,6 @@ getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) {
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown ovf instruction!");
case ISD::SADDO:
// A subtract of one will be selected as a INC. Note that INC doesn't
// set CF, so we can't do this for UADDO.
if (isOneConstant(RHS)) {
BaseOp = X86ISD::INC;
Cond = X86::COND_O;
break;
}
BaseOp = X86ISD::ADD;
Cond = X86::COND_O;
break;
Expand All @@ -19618,13 +19574,6 @@ getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) {
Cond = X86::COND_B;
break;
case ISD::SSUBO:
// A subtract of one will be selected as a DEC. Note that DEC doesn't
// set CF, so we can't do this for USUBO.
if (isOneConstant(RHS)) {
BaseOp = X86ISD::DEC;
Cond = X86::COND_O;
break;
}
BaseOp = X86ISD::SUB;
Cond = X86::COND_O;
break;
Expand Down Expand Up @@ -19675,8 +19624,7 @@ static bool isX86LogicalCmp(SDValue Op) {
if (Op.getResNo() == 1 &&
(Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC ||
Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL ||
Opc == X86ISD::INC || Opc == X86ISD::DEC || Opc == X86ISD::OR ||
Opc == X86ISD::XOR || Opc == X86ISD::AND))
Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND))
return true;

return false;
Expand Down Expand Up @@ -25511,8 +25459,7 @@ static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget,
}

static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,
const X86Subtarget &Subtarget,
bool AllowIncDec = true) {
const X86Subtarget &Subtarget) {
unsigned NewOpc = 0;
switch (N->getOpcode()) {
case ISD::ATOMIC_LOAD_ADD:
Expand All @@ -25536,25 +25483,6 @@ static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG,

MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand();

if (auto *C = dyn_cast<ConstantSDNode>(N->getOperand(2))) {
// Convert to inc/dec if they aren't slow or we are optimizing for size.
if (AllowIncDec && (!Subtarget.slowIncDec() ||
DAG.getMachineFunction().getFunction().optForSize())) {
if ((NewOpc == X86ISD::LADD && C->isOne()) ||
(NewOpc == X86ISD::LSUB && C->isAllOnesValue()))
return DAG.getMemIntrinsicNode(X86ISD::LINC, SDLoc(N),
DAG.getVTList(MVT::i32, MVT::Other),
{N->getOperand(0), N->getOperand(1)},
/*MemVT=*/N->getSimpleValueType(0), MMO);
if ((NewOpc == X86ISD::LSUB && C->isOne()) ||
(NewOpc == X86ISD::LADD && C->isAllOnesValue()))
return DAG.getMemIntrinsicNode(X86ISD::LDEC, SDLoc(N),
DAG.getVTList(MVT::i32, MVT::Other),
{N->getOperand(0), N->getOperand(1)},
/*MemVT=*/N->getSimpleValueType(0), MMO);
}
}

return DAG.getMemIntrinsicNode(
NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other),
{N->getOperand(0), N->getOperand(1), N->getOperand(2)},
Expand Down Expand Up @@ -27034,8 +26962,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::LOR: return "X86ISD::LOR";
case X86ISD::LXOR: return "X86ISD::LXOR";
case X86ISD::LAND: return "X86ISD::LAND";
case X86ISD::LINC: return "X86ISD::LINC";
case X86ISD::LDEC: return "X86ISD::LDEC";
case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL";
case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD";
case X86ISD::VTRUNC: return "X86ISD::VTRUNC";
Expand Down Expand Up @@ -27073,8 +26999,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SBB: return "X86ISD::SBB";
case X86ISD::SMUL: return "X86ISD::SMUL";
case X86ISD::UMUL: return "X86ISD::UMUL";
case X86ISD::INC: return "X86ISD::INC";
case X86ISD::DEC: return "X86ISD::DEC";
case X86ISD::OR: return "X86ISD::OR";
case X86ISD::XOR: return "X86ISD::XOR";
case X86ISD::AND: return "X86ISD::AND";
Expand Down Expand Up @@ -34297,16 +34221,7 @@ static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC,
/*Chain*/ CmpLHS.getOperand(0), /*LHS*/ CmpLHS.getOperand(1),
/*RHS*/ DAG.getConstant(-Addend, SDLoc(CmpRHS), CmpRHS.getValueType()),
AN->getMemOperand());
// If the comparision uses the CF flag we can't use INC/DEC instructions.
bool NeedCF = false;
switch (CC) {
default: break;
case X86::COND_A: case X86::COND_AE:
case X86::COND_B: case X86::COND_BE:
NeedCF = true;
break;
}
auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget, !NeedCF);
auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget);
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0),
DAG.getUNDEF(CmpLHS.getValueType()));
DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1));
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/X86/X86ISelLowering.h
Expand Up @@ -337,7 +337,7 @@ namespace llvm {

// Arithmetic operations with FLAGS results.
ADD, SUB, ADC, SBB, SMUL, UMUL,
INC, DEC, OR, XOR, AND,
OR, XOR, AND,

// Bit field extract.
BEXTR,
Expand Down Expand Up @@ -568,7 +568,7 @@ namespace llvm {

/// LOCK-prefixed arithmetic read-modify-write instructions.
/// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
LADD, LSUB, LOR, LXOR, LAND, LINC, LDEC,
LADD, LSUB, LOR, LXOR, LAND,

// Load, scalar_to_vector, and zero extend.
VZEXT_LOAD,
Expand Down

0 comments on commit 9d4860e

Please sign in to comment.