Skip to content

Commit

Permalink
[AArch64] Try to re-use extended operand for SETCC with vector ops.
Browse files Browse the repository at this point in the history
Try to re-use an already extended operand for SetCC with vector operands
feeding an extended select. Doing so avoids requiring another full
extension of the SET_CC result when lowering the select.

This improves lowering for certain extend/cmp/select patterns operating.
For example with  v16i8, this replaces 6 instructions for the extra extension
with 4 separate selects.

This improves the generated code for loops like the one below in
combination with D96522.

    int foo(uint8_t *p, int N) {
      unsigned long long sum = 0;
      for (int i = 0; i < N ; i++, p++) {
        unsigned int v = *p;
        sum += (v < 127) ? v : 256 - v;
      }
      return sum;
    }

https://clang.godbolt.org/z/Wco866MjY

On the AArch64 cores I have access to, the patch improves performance of
the vector loop by ~10%.

This could be generalized per follow-ups, but the initial version
targets one of the more important cases in combination with D96522.

Alive2 modeling:
* sext EQ https://alive2.llvm.org/ce/z/5upBvb
* sext NE https://alive2.llvm.org/ce/z/zbEcJp
* zext EQ https://alive2.llvm.org/ce/z/_xMwof
* zext NE https://alive2.llvm.org/ce/z/5FwKfc
* zext unsigned predicate: https://alive2.llvm.org/ce/z/iEwLU3
* sext signed predicate: https://alive2.llvm.org/ce/z/aMBega

Reviewed By: dmgreen

Differential Revision: https://reviews.llvm.org/D120481
  • Loading branch information
fhahn committed Jul 7, 2022
1 parent 0a9667b commit afdedd4
Show file tree
Hide file tree
Showing 2 changed files with 192 additions and 189 deletions.
51 changes: 51 additions & 0 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Expand Up @@ -18065,6 +18065,54 @@ static SDValue performCSELCombine(SDNode *N,
return performCONDCombine(N, DCI, DAG, 2, 3);
}

// Try to re-use an already extended operand of a vector SetCC feeding a
// extended select. Doing so avoids requiring another full extension of the
// SET_CC result when lowering the select.
static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG) {
EVT Op0MVT = Op->getOperand(0).getValueType();
if (!Op0MVT.isVector() || Op->use_empty())
return SDValue();

// Make sure that all uses of Op are VSELECTs with result matching types where
// the result type has a larger element type than the SetCC operand.
SDNode *FirstUse = *Op->use_begin();
if (FirstUse->getOpcode() != ISD::VSELECT)
return SDValue();
EVT UseMVT = FirstUse->getValueType(0);
if (UseMVT.getScalarSizeInBits() <= Op0MVT.getScalarSizeInBits())
return SDValue();
if (any_of(Op->uses(), [&UseMVT](const SDNode *N) {
return N->getOpcode() != ISD::VSELECT || N->getValueType(0) != UseMVT;
}))
return SDValue();

APInt V;
if (!ISD::isConstantSplatVector(Op->getOperand(1).getNode(), V))
return SDValue();

SDLoc DL(Op);
SDValue Op0ExtV;
SDValue Op1ExtV;
ISD::CondCode CC = cast<CondCodeSDNode>(Op->getOperand(2))->get();
// Check if the first operand of the SET_CC is already extended. If it is,
// split the SET_CC and re-use the extended version of the operand.
SDNode *Op0SExt = DAG.getNodeIfExists(ISD::SIGN_EXTEND, DAG.getVTList(UseMVT),
Op->getOperand(0));
SDNode *Op0ZExt = DAG.getNodeIfExists(ISD::ZERO_EXTEND, DAG.getVTList(UseMVT),
Op->getOperand(0));
if (Op0SExt && (isSignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
Op0ExtV = SDValue(Op0SExt, 0);
Op1ExtV = DAG.getNode(ISD::SIGN_EXTEND, DL, UseMVT, Op->getOperand(1));
} else if (Op0ZExt && (isUnsignedIntSetCC(CC) || isIntEqualitySetCC(CC))) {
Op0ExtV = SDValue(Op0ZExt, 0);
Op1ExtV = DAG.getNode(ISD::ZERO_EXTEND, DL, UseMVT, Op->getOperand(1));
} else
return SDValue();

return DAG.getNode(ISD::SETCC, DL, UseMVT.changeVectorElementType(MVT::i1),
Op0ExtV, Op1ExtV, Op->getOperand(2));
}

static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
SDValue LHS = N->getOperand(0);
Expand All @@ -18073,6 +18121,9 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
SDLoc DL(N);
EVT VT = N->getValueType(0);

if (SDValue V = tryToWidenSetCCOperands(N, DAG))
return V;

// setcc (csel 0, 1, cond, X), 1, ne ==> csel 0, 1, !cond, X
if (Cond == ISD::SETNE && isOneConstant(RHS) &&
LHS->getOpcode() == AArch64ISD::CSEL &&
Expand Down

0 comments on commit afdedd4

Please sign in to comment.