From ffc5db4455d518583d5602035ef040691a7f6358 Mon Sep 17 00:00:00 2001 From: Sergei Barannikov Date: Sat, 15 Nov 2025 18:48:42 +0300 Subject: [PATCH] [ARM] Pattern match Low Overhead Loops pseudos (NFCI) --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 36 ------------------------- llvm/lib/Target/ARM/ARMInstrThumb2.td | 31 ++++++++++++++++----- 2 files changed, 24 insertions(+), 43 deletions(-) diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 847b7af5a9b11..26b5e5a22386e 100644 --- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -3965,31 +3965,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { return; // Other cases are autogenerated. break; - case ARMISD::WLSSETUP: { - SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, - N->getOperand(0)); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } - case ARMISD::WLS: { - SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, - N->getOperand(1), N->getOperand(2), - N->getOperand(0)); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } - case ARMISD::LE: { - SDValue Ops[] = { N->getOperand(1), - N->getOperand(2), - N->getOperand(0) }; - unsigned Opc = ARM::t2LoopEnd; - SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); - ReplaceUses(N, New); - CurDAG->RemoveDeadNode(N); - return; - } case ARMISD::LDRD: { if (Subtarget->isThumb2()) break; // TableGen handles isel in this case. @@ -4043,17 +4018,6 @@ void ARMDAGToDAGISel::Select(SDNode *N) { CurDAG->RemoveDeadNode(N); return; } - case ARMISD::LOOP_DEC: { - SDValue Ops[] = { N->getOperand(1), - N->getOperand(2), - N->getOperand(0) }; - SDNode *Dec = - CurDAG->getMachineNode(ARM::t2LoopDec, dl, - CurDAG->getVTList(MVT::i32, MVT::Other), Ops); - ReplaceUses(N, Dec); - CurDAG->RemoveDeadNode(N); - return; - } case ARMISD::BRCOND: { // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td index c229c8e4491df..911d7ebfba141 100644 --- a/llvm/lib/Target/ARM/ARMInstrThumb2.td +++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td @@ -5581,6 +5581,20 @@ class t2LOL let Predicates = [IsThumb2, HasV8_1MMainline, HasLOB]; } +def arm_wlssetup + : SDNode<"ARMISD::WLSSETUP", + SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisSameAs<1, 0>]>>; + +def arm_wls : SDNode<"ARMISD::WLS", + SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, + [SDNPHasChain]>; + +def arm_loop_dec : SDNode<"ARMISD::LOOP_DEC", SDTIntBinOp, [SDNPHasChain]>; + +def arm_le : SDNode<"ARMISD::LE", + SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>, + [SDNPHasChain]>; + let isNotDuplicable = 1 in { def t2WLS : t2LOL<(outs GPRlr:$LR), (ins rGPR:$Rn, wlslabel_u11:$label), @@ -5651,15 +5665,17 @@ def t2DoLoopStartTP : // valid after reg alloc, as it should be lowered during MVETPAndVPTOptimisations // into a t2WhileLoopStartLR (or expanded). def t2WhileLoopSetup : - t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, []>; + t2PseudoInst<(outs GPRlr:$lr), (ins rGPR:$tc), 4, IIC_Br, + [(set i32:$lr, (arm_wlssetup i32:$tc))]>; // A pseudo to represent the decrement in a low overhead loop. A t2LoopDec and // t2LoopEnd together represent a LE instruction. Ideally these are converted // to a t2LoopEndDec which is lowered as a single instruction. let hasSideEffects = 0 in def t2LoopDec : - t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), - 4, IIC_Br, []>, Sched<[WriteBr]>; + t2PseudoInst<(outs GPRlr:$Rm), (ins GPRlr:$Rn, imm0_7:$size), 4, IIC_Br, + [(set i32:$Rm, (arm_loop_dec i32:$Rn, timm:$size))]>, + Sched<[WriteBr]>; let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in { // The branch in a t2WhileLoopSetup/t2WhileLoopStart pair, eventually turned @@ -5667,8 +5683,8 @@ let isBranch = 1, isTerminator = 1, hasSideEffects = 1, Defs = [CPSR] in { def t2WhileLoopStart : t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), - 4, IIC_Br, []>, - Sched<[WriteBr]>; + 4, IIC_Br, [(arm_wls i32:$tc, bb:$target)]>, + Sched<[WriteBr]>; // WhileLoopStartLR that sets up LR and branches on zero, equivalent to WLS. It // is lowered in the ARMLowOverheadLoops pass providing the branches are within @@ -5690,8 +5706,9 @@ def t2WhileLoopStartTP : // t2LoopEnd - the branch half of a t2LoopDec/t2LoopEnd pair. def t2LoopEnd : - t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), - 8, IIC_Br, []>, Sched<[WriteBr]>; + t2PseudoInst<(outs), (ins GPRlr:$tc, brtarget:$target), + 8, IIC_Br, [(arm_le i32:$tc, bb:$target)]>, + Sched<[WriteBr]>; // The combination of a t2LoopDec and t2LoopEnd, performing both the LR // decrement and branch as a single instruction. Is lowered to a LE or