Skip to content

Commit

Permalink
Add Transform for (and/or (eq/ne A,Pow2),(eq/ne A,-Pow2))->`(eq/ne …
Browse files Browse the repository at this point in the history
…(and (and A,Pow2),~(Pow2*2)), 0)`

In many instances this can be preferable if the `icmp` -> `i1` cannot be
done in one instruction (such as X86 for scalars).

At the moment guarded behind `TLI.isDesirableToCombineLogicOpOfSETCC`.

alive2 links:
https://alive2.llvm.org/ce/z/nLm5sN
https://alive2.llvm.org/ce/z/moEcyE

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D142344
  • Loading branch information
goldsteinn committed Feb 15, 2023
1 parent e29c439 commit 54a9e99
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 16 deletions.
17 changes: 17 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Expand Up @@ -4002,6 +4002,23 @@ class TargetLowering : public TargetLoweringBase {
return true;
}

// Return true if its desirable to try and optimize LogicOp(SETCC0, SETCC1).
// An example (what is implemented as of writing this) is:
// With C as a power of 2 and C != 0 and C != INT_MIN:
// (icmp eq A, C) | (icmp eq A, -C)
// -> (icmp eq and(add(A, C), ~(C + C)), 0)
// (icmp ne A, C) & (icmp ne A, -C)w
// -> (icmp ne and(add(A, C), ~(C + C)), 0)
//
// @param LogicOp the logic op
// @param SETCC0 the first of the SETCC nodes
// @param SETCC0 the second of the SETCC nodes
virtual bool isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
const SDNode *SETCC0,
const SDNode *SETCC1) const {
return false;
}

/// Return true if it is profitable to combine an XOR of a logical shift
/// to create a logical shift of NOT. This transformation may not be desirable
/// if it disrupts a particularly auspicious target-specific tree (e.g.
Expand Down
65 changes: 65 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
Expand Up @@ -5866,6 +5866,65 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
return SDValue();
}

static SDValue foldAndOrOfSETCC(SDNode *LogicOp, SelectionDAG &DAG) {
assert(
(LogicOp->getOpcode() == ISD::AND || LogicOp->getOpcode() == ISD::OR) &&
"Invalid Op to combine SETCC with");

// TODO: Search past casts/truncates.
SDValue LHS = LogicOp->getOperand(0);
SDValue RHS = LogicOp->getOperand(1);
if (LHS->getOpcode() != ISD::SETCC || RHS->getOpcode() != ISD::SETCC)
return SDValue();

const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isDesirableToCombineLogicOpOfSETCC(LogicOp, LHS.getNode(),
RHS.getNode()))
return SDValue();

ISD::CondCode CCL = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
ISD::CondCode CCR = cast<CondCodeSDNode>(RHS.getOperand(2))->get();

SDValue LHS0 = LHS->getOperand(0);
SDValue RHS0 = RHS->getOperand(0);
SDValue LHS1 = LHS->getOperand(1);
SDValue RHS1 = RHS->getOperand(1);

auto *LHS1C = dyn_cast<ConstantSDNode>(LHS1);
auto *RHS1C = dyn_cast<ConstantSDNode>(RHS1);
EVT VT = LogicOp->getValueType(0);
EVT OpVT = LHS0.getValueType();
SDLoc DL(LogicOp);

// With C as a power of 2 and C != 0 and C != INT_MIN:
// (icmp eq A, C) | (icmp eq A, -C)
// -> (icmp eq and(add(A, C), ~(C + C)), 0)
// (icmp ne A, C) & (icmp ne A, -C)w
// -> (icmp ne and(add(A, C), ~(C + C)), 0)
if (CCL == CCR &&
CCL == (LogicOp->getOpcode() == ISD::AND ? ISD::SETNE : ISD::SETEQ) &&
LHS0 == RHS0 && LHS1C && RHS1C && OpVT.isInteger() && LHS.hasOneUse() &&
RHS.hasOneUse() && LHS1C->getAPIntValue() == (-RHS1C->getAPIntValue())) {
const ConstantSDNode *Pow2 = nullptr;
if (LHS1C->getAPIntValue().isPowerOf2())
Pow2 = LHS1C;
else if (RHS1C->getAPIntValue().isPowerOf2())
Pow2 = RHS1C;
// isPowerOf2 is only for non-zero powers of 2.
if (Pow2 != nullptr && !Pow2->getAPIntValue().isMinSignedValue()) {
const APInt &C = Pow2->getAPIntValue();
SDValue AddOp =
DAG.getNode(ISD::ADD, DL, OpVT, LHS0, DAG.getConstant(C, DL, OpVT));
SDValue AndOp = DAG.getNode(ISD::AND, DL, OpVT, AddOp,
DAG.getConstant(~(C + C), DL, OpVT));
return DAG.getNode(ISD::SETCC, DL, VT, AndOp,
DAG.getConstant(0, DL, OpVT), LHS.getOperand(2));
}
}

return SDValue();
}

/// This contains all DAGCombine rules which reduce two values combined by
/// an And operation to a single value. This makes them reusable in the context
/// of visitSELECT(). Rules involving constants are not included as
Expand Down Expand Up @@ -6567,6 +6626,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), APInt::getAllOnes(BitWidth)))
return DAG.getConstant(0, SDLoc(N), VT);

if (SDValue R = foldAndOrOfSETCC(N, DAG))
return R;

if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;

Expand Down Expand Up @@ -7457,6 +7519,9 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
return N1;

if (SDValue R = foldAndOrOfSETCC(N, DAG))
return R;

if (SDValue Combined = visitORLike(N0, N1, N))
return Combined;

Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.cpp
Expand Up @@ -57124,6 +57124,12 @@ SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl,
return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG);
}

bool X86TargetLowering::isDesirableToCombineLogicOpOfSETCC(
const SDNode *LogicOp, const SDNode *SETCC0, const SDNode *SETCC1) const {
EVT VT = LogicOp->getValueType(0);
return VT.isScalarInteger();
}

bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
EVT VT = Op.getValueType();
bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL &&
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Target/X86/X86ISelLowering.h
Expand Up @@ -1058,6 +1058,12 @@ namespace llvm {
/// and some i16 instructions are slow.
bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;

/// Return true if this is operating on scalar integers.
bool
isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
const SDNode *SETCC0,
const SDNode *SETCC1) const override;

/// Return the newly negated expression if the cost is not expensive and
/// set the cost in \p Cost to indicate that if it is cheaper or neutral to
/// do the negation.
Expand Down
24 changes: 8 additions & 16 deletions llvm/test/CodeGen/X86/icmp-pow2-logic-npow2.ll
Expand Up @@ -12,20 +12,16 @@ define i1 @eq_pow_or(i32 %0) nounwind {
; X86-LABEL: eq_pow_or:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpl $32, %eax
; X86-NEXT: sete %cl
; X86-NEXT: cmpl $-32, %eax
; X86-NEXT: addl $32, %eax
; X86-NEXT: testl $-65, %eax
; X86-NEXT: sete %al
; X86-NEXT: orb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: eq_pow_or:
; X64: # %bb.0:
; X64-NEXT: cmpl $32, %edi
; X64-NEXT: sete %cl
; X64-NEXT: cmpl $-32, %edi
; X64-NEXT: addl $32, %edi
; X64-NEXT: testl $-65, %edi
; X64-NEXT: sete %al
; X64-NEXT: orb %cl, %al
; X64-NEXT: retq
%2 = icmp eq i32 %0, 32
%3 = icmp eq i32 %0, -32
Expand All @@ -37,20 +33,16 @@ define i1 @ne_pow_and(i8 %0) nounwind {
; X86-LABEL: ne_pow_and:
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X86-NEXT: cmpb $16, %al
; X86-NEXT: setne %cl
; X86-NEXT: cmpb $-16, %al
; X86-NEXT: addb $16, %al
; X86-NEXT: testb $-33, %al
; X86-NEXT: setne %al
; X86-NEXT: andb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: ne_pow_and:
; X64: # %bb.0:
; X64-NEXT: cmpb $16, %dil
; X64-NEXT: setne %cl
; X64-NEXT: cmpb $-16, %dil
; X64-NEXT: addb $16, %dil
; X64-NEXT: testb $-33, %dil
; X64-NEXT: setne %al
; X64-NEXT: andb %cl, %al
; X64-NEXT: retq
%2 = icmp ne i8 %0, 16
%3 = icmp ne i8 %0, -16
Expand Down

0 comments on commit 54a9e99

Please sign in to comment.