-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[ARM] Lower abds and abdu on ARM #158734
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[ARM] Lower abds and abdu on ARM #158734
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-llvm-analysis Author: AZero13 (AZero13) ChangesFor maximum effect, there are plenty of PRs stacked on here. Many of which are linked elsewhere, but I have no idea how to do this in the right order. Patch is 214.50 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158734.diff 17 Files Affected:
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 5c35b3327c16d..1556b63852280 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -2790,25 +2790,40 @@ static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {
[[fallthrough]];
case ARM::RSBrr:
case ARM::RSBri:
+ case ARM::RSBrsi:
+ case ARM::RSBrsr:
case ARM::RSCrr:
case ARM::RSCri:
+ case ARM::RSCrsr:
+ case ARM::RSCrsi:
case ARM::ADDrr:
case ARM::ADDri:
+ case ARM::ADDrsi:
+ case ARM::ADDrsr:
case ARM::ADCrr:
case ARM::ADCri:
case ARM::SUBrr:
case ARM::SUBri:
+ case ARM::SUBrsr:
+ case ARM::SUBrsi:
case ARM::SBCrr:
case ARM::SBCri:
+ case ARM::SBCrsi:
+ case ARM::SBCrsr:
case ARM::t2RSBri:
+ case ARM::t2RSBrr:
+ case ARM::t2RSBrs:
case ARM::t2ADDrr:
case ARM::t2ADDri:
+ case ARM::t2ADDrs:
case ARM::t2ADCrr:
case ARM::t2ADCri:
case ARM::t2SUBrr:
case ARM::t2SUBri:
+ case ARM::t2SUBrs:
case ARM::t2SBCrr:
case ARM::t2SBCri:
+ case ARM::t2SBCrs:
case ARM::ANDrr:
case ARM::ANDri:
case ARM::ANDrsr:
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 9ad46df159c20..c22cdb6711516 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -182,7 +182,10 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
return ARM_AM::getT2SOImmVal(~Imm) != -1;
}
- // Include the pieces autogenerated from the target description.
+ // Preference helper: for SUB with encodable immediate LHS, select RSBri
+ // and materialize any RHS shift first when needed. Returns true if handled.
+bool tryPreferRSBForSUB(SDNode *N);
+
#include "ARMGenDAGISel.inc"
private:
@@ -306,9 +309,6 @@ class ARMDAGToDAGISel : public SelectionDAGISel {
bool tryInsertVectorElt(SDNode *N);
- // Select special operations if node forms integer ABS pattern
- bool tryABSOp(SDNode *N);
-
bool tryReadRegister(SDNode *N);
bool tryWriteRegister(SDNode *N);
@@ -3459,45 +3459,6 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
return false;
}
-/// Target-specific DAG combining for ISD::SUB.
-/// Target-independent combining lowers SELECT_CC nodes of the form
-/// select_cc setg[ge] X, 0, X, -X
-/// select_cc setgt X, -1, X, -X
-/// select_cc setl[te] X, 0, -X, X
-/// select_cc setlt X, 1, -X, X
-/// which represent Integer ABS into:
-/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
-/// ARM instruction selection detects the latter and matches it to
-/// ARM::ABS or ARM::t2ABS machine node.
-bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
- SDValue SUBSrc0 = N->getOperand(0);
- SDValue SUBSrc1 = N->getOperand(1);
- EVT VT = N->getValueType(0);
-
- if (Subtarget->isThumb1Only())
- return false;
-
- if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
- return false;
-
- SDValue XORSrc0 = SUBSrc0.getOperand(0);
- SDValue XORSrc1 = SUBSrc0.getOperand(1);
- SDValue SRASrc0 = SUBSrc1.getOperand(0);
- SDValue SRASrc1 = SUBSrc1.getOperand(1);
- ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
- EVT XType = SRASrc0.getValueType();
- unsigned Size = XType.getSizeInBits() - 1;
-
- if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
- SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
- unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
- CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
- return true;
- }
-
- return false;
-}
-
/// We've got special pseudo-instructions for these
void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
unsigned Opcode;
@@ -3534,6 +3495,81 @@ getContiguousRangeOfSetBits(const APInt &A) {
return std::make_pair(FirstOne, LastOne);
}
+bool ARMDAGToDAGISel::tryPreferRSBForSUB(SDNode *N) {
+ if (Subtarget->isThumb1Only())
+ return false;
+
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ const auto *CI = dyn_cast<ConstantSDNode>(LHS);
+ if (!CI)
+ return false;
+
+ unsigned Imm = (unsigned)CI->getZExtValue();
+ bool Encodable = Subtarget->isThumb() ? is_t2_so_imm(Imm) : is_so_imm(Imm);
+ if (!Encodable)
+ return false;
+
+ SDLoc dl(N);
+ SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
+ SDValue ImmOp = CurDAG->getTargetConstant(Imm, dl, MVT::i32);
+
+ // Materialize shift if RHS is shifted
+ ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(RHS.getOpcode());
+ SDValue Rn = RHS;
+ if (ShOpcVal != ARM_AM::no_shift) {
+ const ConstantSDNode *ShC = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
+ if (!ShC)
+ return false; // can't safely materialize variable shift here
+ unsigned ShAmt = ShC->getZExtValue();
+ if (Subtarget->isThumb()) {
+ unsigned ShOpc = 0;
+ switch (ShOpcVal) {
+ default:
+ ShOpc = 0;
+ break;
+ case ARM_AM::lsl:
+ ShOpc = ARM::t2LSLri;
+ break;
+ case ARM_AM::lsr:
+ ShOpc = ARM::t2LSRri;
+ break;
+ case ARM_AM::asr:
+ ShOpc = ARM::t2ASRri;
+ break;
+ case ARM_AM::ror:
+ ShOpc = ARM::t2RORri;
+ break;
+ }
+ if (!ShOpc)
+ return false;
+ SDValue ShAmtOp = CurDAG->getTargetConstant(ShAmt, dl, MVT::i32);
+ SDValue OpsShift[] = {RHS.getOperand(0), ShAmtOp, getAL(CurDAG, dl), Reg0,
+ Reg0};
+ MachineSDNode *ShN =
+ CurDAG->getMachineNode(ShOpc, dl, MVT::i32, OpsShift);
+ Rn = SDValue(ShN, 0);
+ } else {
+ unsigned SOpc = ARM_AM::getSORegOpc(ShOpcVal, ShAmt);
+ SDValue ShImmOp = CurDAG->getTargetConstant(SOpc, dl, MVT::i32);
+ SDValue OpsShift[] = {RHS.getOperand(0), ShImmOp, getAL(CurDAG, dl), Reg0,
+ Reg0};
+ MachineSDNode *ShN =
+ CurDAG->getMachineNode(ARM::MOVsi, dl, MVT::i32, OpsShift);
+ Rn = SDValue(ShN, 0);
+ }
+ }
+
+ if (Subtarget->isThumb()) {
+ SDValue Ops[] = {Rn, ImmOp, getAL(CurDAG, dl), Reg0, Reg0};
+ CurDAG->SelectNodeTo(N, ARM::t2RSBri, MVT::i32, Ops);
+ } else {
+ SDValue Ops[] = {Rn, ImmOp, getAL(CurDAG, dl), Reg0, Reg0};
+ CurDAG->SelectNodeTo(N, ARM::RSBri, MVT::i32, Ops);
+ }
+ return true;
+}
+
void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
assert(N->getOpcode() == ARMISD::CMPZ);
SwitchEQNEToPLMI = false;
@@ -3634,6 +3670,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
switch (N->getOpcode()) {
default: break;
+ case ISD::SUB:
+ if (tryPreferRSBForSUB(N)) return;
+ break;
case ISD::STORE: {
// For Thumb1, match an sp-relative store in C++. This is a little
// unfortunate, but I don't think I can make the chain check work
@@ -3685,12 +3724,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
if (tryInlineAsm(N))
return;
break;
- case ISD::SUB:
- // Select special operations if SUB node forms integer ABS pattern
- if (tryABSOp(N))
- return;
- // Other cases are autogenerated.
- break;
case ISD::Constant: {
unsigned Val = N->getAsZExtVal();
// If we can't materialize the constant we need to use a literal pool
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 4af2721562d7c..3e75e6b81fe68 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -646,6 +646,14 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
if (!Subtarget->hasV8_1MMainlineOps())
setOperationAction(ISD::UCMP, MVT::i32, Custom);
+
+ // Override ABDU to Custom for Thumb1 to use optimized sequence
+ if (!Subtarget->isThumb1Only())
+ setOperationAction(ISD::ABDS, MVT::i32, Custom);
+ setOperationAction(ISD::ABDU, MVT::i32, Custom);
+
+ if (!Subtarget->isThumb1Only())
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f32, Custom);
setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
@@ -5090,6 +5098,25 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
+ // Canonicalise absolute difference patterns in SELECT before converting to SELECT_CC:
+ // select(setcc LHS, RHS, cc), sub(LHS, RHS), sub(RHS, LHS) ->
+ // select(setcc LHS, RHS, cc), sub(LHS, RHS), neg(sub(LHS, RHS))
+ if (Cond.getOpcode() == ISD::SETCC &&
+ SelectTrue.getOpcode() == ISD::SUB && SelectFalse.getOpcode() == ISD::SUB) {
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+
+ if (SelectTrue.getOperand(0) == LHS && SelectTrue.getOperand(1) == RHS &&
+ SelectFalse.getOperand(0) == RHS && SelectFalse.getOperand(1) == LHS) {
+ SelectTrue->dropFlags(SDNodeFlags::PoisonGeneratingFlags);
+ SelectFalse = DAG.getNegative(SelectTrue, dl, SelectTrue.getValueType());
+ } else if (SelectTrue.getOperand(0) == RHS && SelectTrue.getOperand(1) == LHS &&
+ SelectFalse.getOperand(0) == LHS && SelectFalse.getOperand(1) == RHS) {
+ SelectFalse->dropFlags(SDNodeFlags::PoisonGeneratingFlags);
+ SelectTrue = DAG.getNegative(SelectFalse, dl, SelectFalse.getValueType());
+ }
+ }
+
// ARM's BooleanContents value is UndefinedBooleanContent. Mask out the
// undefined bits before doing a full-word comparison with zero.
Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond,
@@ -5380,6 +5407,28 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::AND, dl, VT, LHS, Shift);
}
+
+
+ // Canonicalise absolute difference patterns:
+ // select_cc LHS, RHS, sub(LHS, RHS), sub(RHS, LHS), cc ->
+ // select_cc LHS, RHS, sub(LHS, RHS), neg(sub(LHS, RHS)), cc
+ //
+ // select_cc LHS, RHS, sub(RHS, LHS), sub(LHS, RHS), cc ->
+ // select_cc LHS, RHS, neg(sub(LHS, RHS)), sub(LHS, RHS), cc
+ // The second forms can be matched into subs+cmov with negation.
+ // NOTE: Drop poison generating flags from the negated operand to avoid
+ // inadvertently propagating poison after the canonicalisation.
+ if (TrueVal.getOpcode() == ISD::SUB && FalseVal.getOpcode() == ISD::SUB) {
+ if (TrueVal.getOperand(0) == LHS && TrueVal.getOperand(1) == RHS &&
+ FalseVal.getOperand(0) == RHS && FalseVal.getOperand(1) == LHS) {
+ TrueVal->dropFlags(SDNodeFlags::PoisonGeneratingFlags);
+ FalseVal = DAG.getNegative(TrueVal, dl, TrueVal.getValueType());
+ } else if (TrueVal.getOperand(0) == RHS && TrueVal.getOperand(1) == LHS &&
+ FalseVal.getOperand(0) == LHS && FalseVal.getOperand(1) == RHS) {
+ FalseVal->dropFlags(SDNodeFlags::PoisonGeneratingFlags);
+ TrueVal = DAG.getNegative(FalseVal, dl, FalseVal.getValueType());
+ }
+ }
}
if (Subtarget->hasV8_1MMainlineOps() && CFVal && CTVal &&
@@ -5506,6 +5555,60 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
return Result;
}
+SDValue ARMTargetLowering::LowerABD(SDValue Op, SelectionDAG &DAG) const {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDLoc DL(Op);
+ EVT VT = Op.getValueType();
+
+ // If the subtract doesn't overflow then just use abs(sub())
+ bool IsNonNegative = DAG.SignBitIsZero(LHS) && DAG.SignBitIsZero(RHS);
+ bool IsSigned = Op.getOpcode() == ISD::ABDS;
+ if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, LHS, RHS))
+ return SDValue();
+
+ if (DAG.willNotOverflowSub(IsSigned || IsNonNegative, RHS, LHS))
+ return SDValue();
+
+ if (Subtarget->isThumb1Only()) {
+ assert(!IsSigned && "Signed ABS not supported on Thumb1");
+ // abdu: subs; sbcs r1,r1,r1(mask from borrow); eors; subs
+
+ // First subtraction: LHS - RHS
+ SDValue Sub1WithFlags =
+ DAG.getNode(ARMISD::SUBC, DL, DAG.getVTList(VT, FlagsVT), LHS, RHS);
+ SDValue Sub1Result = Sub1WithFlags.getValue(0);
+ SDValue Flags1 = Sub1WithFlags.getValue(1);
+
+ // sbcs r1,r1,r1 (mask from borrow)
+ SDValue Sbc1 = DAG.getNode(ARMISD::SUBE, DL, DAG.getVTList(VT, FlagsVT),
+ RHS, RHS, Flags1);
+
+ // eors (XOR)
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, Sub1Result, Sbc1.getValue(0));
+
+ // subs (final subtraction)
+ return DAG.getNode(ISD::SUB, DL, VT, Xor, Sbc1.getValue(0));
+ }
+
+ // Generate SUBS and CSEL for absolute difference (like LowerABS)
+ // Compute a - b with flags
+ SDValue Cmp =
+ DAG.getNode(ARMISD::SUBC, DL, DAG.getVTList(MVT::i32, FlagsVT), LHS, RHS);
+
+ // Compute b - a (negative of a - b)
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, MVT::i32,
+ DAG.getConstant(0, DL, MVT::i32), Cmp.getValue(0));
+
+ // For unsigned: use HS (a >= b) to select a-b, otherwise b-a
+ // For signed: use GE (a >= b) to select a-b, otherwise b-a
+ ARMCC::CondCodes CC = IsSigned ? ARMCC::LT : ARMCC::LO;
+
+ // CSEL: if a > b, select a-b, otherwise b-a
+ return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Cmp.getValue(0), Neg,
+ DAG.getConstant(CC, DL, MVT::i32), Cmp.getValue(1));
+}
+
/// canChangeToInt - Given the fp compare operand, return true if it is suitable
/// to morph to an integer compare sequence.
static bool canChangeToInt(SDValue Op, bool &SeenZero,
@@ -5621,6 +5724,19 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
return SDValue();
}
+// Generate CMP + CMOV for integer abs.
+SDValue ARMTargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+
+ SDValue Neg = DAG.getNegative(Op.getOperand(0), DL, MVT::i32);
+
+ // Generate CMP & CMOV.
+ SDValue Cmp = DAG.getNode(ARMISD::CMP, DL, FlagsVT, Op.getOperand(0),
+ DAG.getConstant(0, DL, MVT::i32));
+ return DAG.getNode(ARMISD::CMOV, DL, MVT::i32, Op.getOperand(0), Neg,
+ DAG.getConstant(ARMCC::MI, DL, MVT::i32), Cmp);
+}
+
SDValue ARMTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
SDValue Cond = Op.getOperand(1);
@@ -10583,6 +10699,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::ABDS:
+ case ISD::ABDU: return LowerABD(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
@@ -10703,6 +10821,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::UCMP:
case ISD::SCMP:
return LowerCMP(Op, DAG);
+ case ISD::ABS:
+ return LowerABS(Op, DAG);
}
}
@@ -12288,89 +12408,6 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
case ARM::Int_eh_sjlj_setup_dispatch:
EmitSjLjDispatchBlock(MI, BB);
return BB;
-
- case ARM::ABS:
- case ARM::t2ABS: {
- // To insert an ABS instruction, we have to insert the
- // diamond control-flow pattern. The incoming instruction knows the
- // source vreg to test against 0, the destination vreg to set,
- // the condition code register to branch on, the
- // true/false values to select between, and a branch opcode to use.
- // It transforms
- // V1 = ABS V0
- // into
- // V2 = MOVS V0
- // BCC (branch to SinkBB if V0 >= 0)
- // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0)
- // SinkBB: V1 = PHI(V2, V3)
- const BasicBlock *LLVM_BB = BB->getBasicBlock();
- MachineFunction::iterator BBI = ++BB->getIterator();
- MachineFunction *Fn = BB->getParent();
- MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB);
- MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB);
- Fn->insert(BBI, RSBBB);
- Fn->insert(BBI, SinkBB);
-
- // Set the call frame size on entry to the new basic blocks.
- unsigned CallFrameSize = TII->getCallFrameSizeAt(MI);
- RSBBB->setCallFrameSize(CallFrameSize);
- SinkBB->setCallFrameSize(CallFrameSize);
-
- Register ABSSrcReg = MI.getOperand(1).getReg();
- Register ABSDstReg = MI.getOperand(0).getReg();
- bool ABSSrcKIll = MI.getOperand(1).isKill();
- bool isThumb2 = Subtarget->isThumb2();
- MachineRegisterInfo &MRI = Fn->getRegInfo();
- // In Thumb mode S must not be specified if source register is the SP or
- // PC and if destination register is the SP, so restrict register class
- Register NewRsbDstReg = MRI.createVirtualRegister(
- isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass);
-
- // Transfer the remainder of BB and its successor edges to sinkMBB.
- SinkBB->splice(SinkBB->begin(), BB,
- std::next(MachineBasicBlock::iterator(MI)), BB->end());
- SinkBB->transferSuccessorsAndUpdatePHIs(BB);
-
- BB->addSuccessor(RSBBB);
- BB->addSuccessor(SinkBB);
-
- // fall through to SinkMBB
- RSBBB->addSuccessor(SinkBB);
-
- // insert a cmp at the end of BB
- BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
- .addReg(ABSSrcReg)
- .addImm(0)
- .add(predOps(ARMCC::AL));
-
- // insert a bcc with opposite CC to ARMCC::MI at the end of BB
- BuildMI(BB, dl,
- TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB)
- .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR);
-
- // insert rsbri in RSBBB
- // Note: BCC and rsbri will be converted into predicated rsbmi
- // by if-conversion pass
- BuildMI(*RSBBB, RSBBB->begin(), dl,
- TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg)
- .addReg(ABSSrcReg, ABSSrcKIll ? RegState::Kill : 0)
- .addImm(0)
- .add(predOps(ARMCC::AL))
- .add(condCodeOp());
-
- // insert PHI in SinkBB,
- // reuse ABSDstReg to not change uses of ABS instruction
- BuildMI(*SinkBB, SinkBB->begin(), dl,
- TII->get(ARM::PHI), ABSDstReg)
- .addReg(NewRsbDstReg).addMBB(RSBBB)
- .addReg(ABSSrcReg).addMBB(BB);
-
- // remove ABS instruction
- MI.eraseFromParent();
-
- // return last added BB
- return SinkBB;
- }
case ARM::COPY_STRUCT_BYVAL_I32:
++NumLoopByVals;
return EmitStructByval(MI, BB);
@@ -14082,6 +14119,41 @@ static SDValue PerformSubCSINCCombine(SDNode *N, SelectionDAG &DAG) {
CSINC.getOperand(3));
}
+static bool isNegatedInteger(SDValue Op) {
+ return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0));
+}
+
+// Try to fold
+//
+// (neg (cmov X, Y)) -> (cmov (neg X), (neg Y))
+//
+// The folding helps cmov to be matched with csneg without generating
+// redundant neg instruction.
+static SDValue performNegCMovCombine(SDNode *N, SelectionDAG &DAG) {
+ if (!isNegatedInteger(SDValue(N, 0)))
+ return SDValue();
+
+ SDValue CMov = N->getOperand(1);
+ if (CMov.getOpcode() != ARMISD::CMOV || !CMov->hasOneUse())
+ return SDValue();
+
+ SDValue N0 = CMov.getOperand(0);
+ SDValue N1 = CMov.getOperand(1);
+
+ // If neither of them are negations, it's not worth the folding as it
+ // introduces two additional negations while reducing one negation.
+ if (!isNegatedInteger(N0) && !isNegatedInteger(N1))
+ return SDValue();
+
+ SDLoc DL(N);
+ EVT VT = CMov.getValueType();
+
+ SDValue N0N = DAG.getNegative(N0, DL, VT);
+ SDValue N1N = DAG.getNegative(N1, DL, VT);
+ return DAG.getNode(ARMISD::CMOV, DL, VT, N0N, N1N, CMov.getOperand(2),
+ CMov.getOperand(3));
+}
+
/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
///
static SDValue PerformSUBCombine(SDNode *N,
@@ -14098,6 +14170,9 @@ static SDValue PerformSUBCombine(SDNode *N,
if (SDValue R = PerformSubCSINCCombine(N, DCI.DAG))
return R;
+ if (SDValue Val = performNegCMovCombine(N, DCI.DAG))
+ return Val;
+
if (!Subtarget->hasMVEIntegerOps() || !N->getValueType(0).isVector())
return SDValue();
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index ccf6d509313b9..3b5bf3a3d9db2 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
55652af
to
bb88573
Compare
…didate I noticed some were missing when one of the IR I was working on did not optimize it. Co-Authored-By: Eli Friedman <efriedma@quicinc.com>
For maximum effect, there are plenty of PRs stacked on here. Many of which are linked elsewhere, but I have no idea how to do this in the right order.