diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index f0813f1f7c61d..5878cda3c5a92 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -14835,6 +14835,59 @@ static SDValue performAddUADDVCombine(SDNode *N, SelectionDAG &DAG) { DAG.getConstant(0, DL, MVT::i64)); } +/// Perform the scalar expression combine in the form of: +/// CSEL (c, 1, cc) + b => CSINC(b+c, b, cc) +static SDValue performAddCSelIntoCSinc(SDNode *N, SelectionDAG &DAG) { + EVT VT = N->getValueType(0); + if (!VT.isScalarInteger() || N->getOpcode() != ISD::ADD) + return SDValue(); + + SDValue CSel = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // Handle commutivity. + if (CSel.getOpcode() != AArch64ISD::CSEL) { + std::swap(CSel, RHS); + if (CSel.getOpcode() != AArch64ISD::CSEL) { + return SDValue(); + } + } + + if (!CSel.hasOneUse()) + return SDValue(); + + AArch64CC::CondCode AArch64CC = + static_cast(CSel.getConstantOperandVal(2)); + + // The CSEL should include a const one operand. + ConstantSDNode *CTVal = dyn_cast(CSel.getOperand(0)); + ConstantSDNode *CFVal = dyn_cast(CSel.getOperand(1)); + if (!CTVal || !CFVal || (!CTVal->isOne() && !CFVal->isOne())) + return SDValue(); + + // switch CSEL (1, c, cc) to CSEL (c, 1, !cc) + if (CTVal->isOne() && !CFVal->isOne()) { + std::swap(CTVal, CFVal); + AArch64CC = AArch64CC::getInvertedCondCode(AArch64CC); + } + + // It might be neutral for larger constants, as the immediate need to be + // materialized in a register. + APInt ADDC = CTVal->getAPIntValue(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) + return SDValue(); + + assert(CFVal->isOne() && "Unexpected constant value"); + + SDLoc DL(N); + SDValue NewNode = DAG.getNode(ISD::ADD, DL, VT, RHS, SDValue(CTVal, 0)); + SDValue CCVal = DAG.getConstant(AArch64CC, DL, MVT::i32); + SDValue Cmp = CSel.getOperand(3); + + return DAG.getNode(AArch64ISD::CSINC, DL, VT, NewNode, RHS, CCVal, Cmp); +} + // ADD(UDOT(zero, x, y), A) --> UDOT(A, x, y) static SDValue performAddDotCombine(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); @@ -14919,6 +14972,8 @@ static SDValue performAddSubCombine(SDNode *N, return Val; if (SDValue Val = performAddDotCombine(N, DAG)) return Val; + if (SDValue Val = performAddCSelIntoCSinc(N, DAG)) + return Val; return performAddSubLongCombine(N, DCI, DAG); } diff --git a/llvm/test/CodeGen/AArch64/aarch64-isel-csinc-type.ll b/llvm/test/CodeGen/AArch64/aarch64-isel-csinc-type.ll index b1e99d5cbb10c..5ae11339dafbf 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-isel-csinc-type.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-isel-csinc-type.ll @@ -11,9 +11,8 @@ define dso_local i8 @csinc1(i8 %a, i8 %b) local_unnamed_addr #0 { ; CHECK-LABEL: csinc1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: tst w0, #0xff -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: csinc w8, w8, wzr, ne -; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: add w8, w1, #3 +; CHECK-NEXT: csinc w0, w8, w1, ne ; CHECK-NEXT: ret entry: %tobool.not = icmp eq i8 %a, 0 @@ -27,9 +26,8 @@ define dso_local i16 @csinc2(i16 %a, i16 %b) local_unnamed_addr #0 { ; CHECK-LABEL: csinc2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: tst w0, #0xffff -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: csinc w8, w8, wzr, ne -; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: add w8, w1, #3 +; CHECK-NEXT: csinc w0, w8, w1, ne ; CHECK-NEXT: ret entry: %tobool.not = icmp eq i16 %a, 0 @@ -43,9 +41,8 @@ define dso_local i32 @csinc3(i32 %a, i32 %b) local_unnamed_addr #0 { ; CHECK-LABEL: csinc3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: csinc w8, w8, wzr, ne -; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: add w8, w1, #3 +; CHECK-NEXT: csinc w0, w8, w1, ne ; CHECK-NEXT: ret entry: %tobool.not = icmp eq i32 %a, 0 @@ -59,9 +56,8 @@ define dso_local i64 @csinc4(i64 %a, i64 %b) local_unnamed_addr #0 { ; CHECK-LABEL: csinc4: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: csinc x8, x8, xzr, ne -; CHECK-NEXT: add x0, x8, x1 +; CHECK-NEXT: add x8, x1, #3 +; CHECK-NEXT: csinc x0, x8, x1, ne ; CHECK-NEXT: ret entry: %tobool.not = icmp eq i64 %a, 0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-isel-csinc.ll b/llvm/test/CodeGen/AArch64/aarch64-isel-csinc.ll index 6618bf038f9c1..9ecc6f5d4aceb 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-isel-csinc.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-isel-csinc.ll @@ -11,9 +11,8 @@ define dso_local i32 @csinc1(i32 %a, i32 %b) { ; CHECK-LABEL: csinc1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: csinc w8, w8, wzr, eq -; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: add w8, w1, #3 +; CHECK-NEXT: csinc w0, w8, w1, eq ; CHECK-NEXT: ret entry: %tobool.not = icmp eq i32 %a, 0 @@ -27,9 +26,8 @@ define dso_local i32 @csinc2(i32 %a, i32 %b) { ; CHECK-LABEL: csinc2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: csinc w8, w8, wzr, ne -; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: add w8, w1, #3 +; CHECK-NEXT: csinc w0, w8, w1, ne ; CHECK-NEXT: ret entry: %tobool.not = icmp eq i32 %a, 0 @@ -42,10 +40,9 @@ entry: define dso_local i32 @csinc3(i32 %a, i32 %b) { ; CHECK-LABEL: csinc3: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub w8, w1, #3 ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w8, #-3 -; CHECK-NEXT: csinc w8, w8, wzr, ne -; CHECK-NEXT: add w0, w8, w1 +; CHECK-NEXT: csinc w0, w8, w1, ne ; CHECK-NEXT: ret entry: %tobool.not = icmp eq i32 %a, 0 @@ -58,14 +55,60 @@ entry: define dso_local i32 @csinc4(i32 %a, i32 %b) { ; CHECK-LABEL: csinc4: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub w8, w1, #3 ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w8, #-3 +; CHECK-NEXT: csinc w0, w8, w1, eq +; CHECK-NEXT: ret +entry: + %tobool.not = icmp eq i32 %a, 0 + %cond.v = select i1 %tobool.not, i32 -3, i32 1 + %cond = add nsw i32 %cond.v, %b + ret i32 %cond +} + +; int csinc5 (int a, int b) { return a ? b+1 : b-4095; } +define dso_local i32 @csinc5(i32 %a, i32 %b) { +; CHECK-LABEL: csinc5: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub w8, w1, #4095 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: csinc w0, w8, w1, eq +; CHECK-NEXT: ret +entry: + %tobool.not = icmp eq i32 %a, 0 + %cond.v = select i1 %tobool.not, i32 -4095, i32 1 + %cond = add nsw i32 %cond.v, %b + ret i32 %cond +} + +; int csinc6 (int a, int b) { return a ? b+1 : b-4096; } +define dso_local i32 @csinc6(i32 %a, i32 %b) { +; CHECK-LABEL: csinc6: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub w8, w1, #1, lsl #12 // =4096 +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: csinc w0, w8, w1, eq +; CHECK-NEXT: ret +entry: + %tobool.not = icmp eq i32 %a, 0 + %cond.v = select i1 %tobool.not, i32 -4096, i32 1 + %cond = add nsw i32 %cond.v, %b + ret i32 %cond +} + +; prevent larger constants (the add laid after csinc) +; int csinc7 (int a, int b) { return a ? b+1 : b-4097; } +define dso_local i32 @csinc7(i32 %a, i32 %b) { +; CHECK-LABEL: csinc7: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: mov w8, #-4097 ; CHECK-NEXT: csinc w8, w8, wzr, eq ; CHECK-NEXT: add w0, w8, w1 ; CHECK-NEXT: ret entry: %tobool.not = icmp eq i32 %a, 0 - %cond.v = select i1 %tobool.not, i32 -3, i32 1 + %cond.v = select i1 %tobool.not, i32 -4097, i32 1 %cond = add nsw i32 %cond.v, %b ret i32 %cond } diff --git a/llvm/test/CodeGen/AArch64/arm64-csel.ll b/llvm/test/CodeGen/AArch64/arm64-csel.ll index 72ad42beb7734..35fcc88d908b2 100644 --- a/llvm/test/CodeGen/AArch64/arm64-csel.ll +++ b/llvm/test/CodeGen/AArch64/arm64-csel.ll @@ -328,11 +328,11 @@ define i64 @foo23(i64 %x) { define i16 @foo24(i8* nocapture readonly %A, i8* nocapture readonly %B) { ; CHECK-LABEL: foo24: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrb w8, [x1] -; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: cmp w8, #33 +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x1] +; CHECK-NEXT: cmp w8, #3 ; CHECK-NEXT: cset w8, hi -; CHECK-NEXT: cmp w9, #3 +; CHECK-NEXT: cmp w9, #33 ; CHECK-NEXT: cinc w0, w8, hi ; CHECK-NEXT: ret entry: