Skip to content

Commit

Permalink
[X86] combineAddOrSubToADCOrSBB - Fold ADD/SUB + (AND(SRL(X,Y),1) -> …
Browse files Browse the repository at this point in the history
…ADC/SBB+BT(X,Y) (REAPPLIED)

As suggested on PR35908, if we are adding/subtracting an extracted bit, attempt to use BT instead to fold the op and use a ADC/SBB op.

Reapply with extra type legality checks - LowerAndToBT was originally only used during lowering, now that it can occur earlier we might encounter illegal types that we can either promote to i32 or just bail.

Differential Revision: https://reviews.llvm.org/D122084
  • Loading branch information
RKSimon committed Mar 21, 2022
1 parent d137528 commit 438ac28
Show file tree
Hide file tree
Showing 2 changed files with 179 additions and 259 deletions.
41 changes: 28 additions & 13 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23523,9 +23523,8 @@ X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,

/// Result of 'and' is compared against zero. Change to a BT node if possible.
/// Returns the BT node and the condition code needed to use it.
static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
const SDLoc &dl, SelectionDAG &DAG,
SDValue &X86CC) {
static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
SelectionDAG &DAG, X86::CondCode &X86CC) {
assert(And.getOpcode() == ISD::AND && "Expected AND node!");
SDValue Op0 = And.getOperand(0);
SDValue Op1 = And.getOperand(1);
Expand Down Expand Up @@ -23587,9 +23586,13 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
// that doing a bittest on the i32 value is ok. We extend to i32 because
// the encoding for the i16 version is larger than the i32 version.
// Also promote i16 to i32 for performance / code size reason.
if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
if (Src.getValueType().getScalarSizeInBits() < 32)
Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);

// No legal type found, give up.
if (!DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
return SDValue();

// See if we can use the 32-bit instruction instead of the 64-bit one for a
// shorter encoding. Since the former takes the modulo 32 of BitNo and the
// latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
Expand All @@ -23603,8 +23606,7 @@ static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
if (Src.getValueType() != BitNo.getValueType())
BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);

X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B,
dl, MVT::i8);
X86CC = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
}

Expand Down Expand Up @@ -24310,8 +24312,11 @@ SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1,
// Lower ((X >>s N) & 1) != 0 to BT(X, N).
if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) &&
(CC == ISD::SETEQ || CC == ISD::SETNE)) {
if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CC))
X86::CondCode X86CondCode;
if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CondCode)) {
X86CC = DAG.getTargetConstant(X86CondCode, dl, MVT::i8);
return BT;
}
}

// Try to use PTEST/PMOVMSKB for a tree ORs equality compared with 0.
Expand Down Expand Up @@ -24783,9 +24788,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
// We know the result of AND is compared against zero. Try to match
// it to BT.
if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
SDValue BTCC;
if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, BTCC)) {
CC = BTCC;
X86::CondCode X86CondCode;
if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, X86CondCode)) {
CC = DAG.getTargetConstant(X86CondCode, DL, MVT::i8);
Cond = BT;
AddTest = false;
}
Expand Down Expand Up @@ -52294,6 +52299,7 @@ static SDValue combineADC(SDNode *N, SelectionDAG &DAG,
/// If this is an add or subtract where one operand is produced by a cmp+setcc,
/// then try to convert it to an ADC or SBB. This replaces TEST+SET+{ADD/SUB}
/// with CMP+{ADC, SBB}.
/// Also try (ADD/SUB)+(AND(SRL,1)) bit extraction pattern with BT+{ADC, SBB}.
static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
SDValue X, SDValue Y,
SelectionDAG &DAG) {
Expand All @@ -52304,11 +52310,20 @@ static SDValue combineAddOrSubToADCOrSBB(bool IsSub, const SDLoc &DL, EVT VT,
if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse())
Y = Y.getOperand(0);

if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse())
if (!Y.hasOneUse())
return SDValue();

X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0);
SDValue EFLAGS = Y.getOperand(1);
X86::CondCode CC;
SDValue EFLAGS;
if (Y.getOpcode() == X86ISD::SETCC) {
CC = (X86::CondCode)Y.getConstantOperandVal(0);
EFLAGS = Y.getOperand(1);
} else if (Y.getOpcode() == ISD::AND && isOneConstant(Y.getOperand(1))) {
EFLAGS = LowerAndToBT(Y, ISD::SETNE, DL, DAG, CC);
}

if (!EFLAGS)
return SDValue();

// If X is -1 or 0, then we have an opportunity to avoid constants required in
// the general case below.
Expand Down
Loading

0 comments on commit 438ac28

Please sign in to comment.