diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index d732f45248cec..2411b1ad52031 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -97,9 +97,6 @@ STATISTIC(NumCandidatesDropped, static cl::opt EnableShrinkWrapOpt("enable-shrink-wrap", cl::Hidden, cl::desc("enable the shrink-wrapping pass")); -static cl::opt EnablePostShrinkWrapOpt( - "enable-shrink-wrap-region-split", cl::init(true), cl::Hidden, - cl::desc("enable splitting of the restore block if possible")); namespace { @@ -187,30 +184,6 @@ class ShrinkWrap : public MachineFunctionPass { /// this call. void updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS); - // Try to find safe point based on dominance and block frequency without - // any change in IR. - bool performShrinkWrapping(MachineFunction &MF, RegScavenger *RS); - - /// This function tries to split the restore point if doing so can shrink the - /// save point further. \return True if restore point is split. - bool postShrinkWrapping(bool HasCandidate, MachineFunction &MF, - RegScavenger *RS); - - /// This function analyzes if the restore point can split to create a new - /// restore point. This function collects - /// 1. Any preds of current restore that are reachable by callee save/FI - /// blocks - /// - indicated by DirtyPreds - /// 2. Any preds of current restore that are not DirtyPreds - indicated by - /// CleanPreds - /// Both sets should be non-empty for considering restore point split. - bool checkIfRestoreSplittable( - const MachineBasicBlock *CurRestore, - const DenseSet &ReachableByDirty, - SmallVectorImpl &DirtyPreds, - SmallVectorImpl &CleanPreds, - const TargetInstrInfo *TII, RegScavenger *RS); - /// Initialize the pass for \p MF. void init(MachineFunction &MF) { RCI.runOnMachineFunction(MF); @@ -347,303 +320,18 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI, /// Helper function to find the immediate (post) dominator. template static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs, - DominanceAnalysis &Dom, bool Strict = true) { + DominanceAnalysis &Dom) { MachineBasicBlock *IDom = &Block; for (MachineBasicBlock *BB : BBs) { IDom = Dom.findNearestCommonDominator(IDom, BB); if (!IDom) break; } - if (Strict && IDom == &Block) + if (IDom == &Block) return nullptr; return IDom; } -static bool isAnalyzableBB(const TargetInstrInfo &TII, - MachineBasicBlock &Entry) { - // Check if the block is analyzable. - MachineBasicBlock *TBB = nullptr, *FBB = nullptr; - SmallVector Cond; - return !TII.analyzeBranch(Entry, TBB, FBB, Cond); -} - -/// Determines if any predecessor of MBB is on the path from block that has use -/// or def of CSRs/FI to MBB. -/// ReachableByDirty: All blocks reachable from block that has use or def of -/// CSR/FI. -static bool -hasDirtyPred(const DenseSet &ReachableByDirty, - const MachineBasicBlock &MBB) { - for (const MachineBasicBlock *PredBB : MBB.predecessors()) - if (ReachableByDirty.count(PredBB)) - return true; - return false; -} - -/// Derives the list of all the basic blocks reachable from MBB. -static void markAllReachable(DenseSet &Visited, - const MachineBasicBlock &MBB) { - SmallVector Worklist(MBB.succ_begin(), - MBB.succ_end()); - Visited.insert(&MBB); - while (!Worklist.empty()) { - MachineBasicBlock *SuccMBB = Worklist.pop_back_val(); - if (!Visited.insert(SuccMBB).second) - continue; - Worklist.append(SuccMBB->succ_begin(), SuccMBB->succ_end()); - } -} - -/// Collect blocks reachable by use or def of CSRs/FI. -static void collectBlocksReachableByDirty( - const DenseSet &DirtyBBs, - DenseSet &ReachableByDirty) { - for (const MachineBasicBlock *MBB : DirtyBBs) { - if (ReachableByDirty.count(MBB)) - continue; - // Mark all offsprings as reachable. - markAllReachable(ReachableByDirty, *MBB); - } -} - -/// \return true if there is a clean path from SavePoint to the original -/// Restore. -static bool -isSaveReachableThroughClean(const MachineBasicBlock *SavePoint, - ArrayRef CleanPreds) { - DenseSet Visited; - SmallVector Worklist(CleanPreds.begin(), - CleanPreds.end()); - while (!Worklist.empty()) { - MachineBasicBlock *CleanBB = Worklist.pop_back_val(); - if (CleanBB == SavePoint) - return true; - if (!Visited.insert(CleanBB).second || !CleanBB->pred_size()) - continue; - Worklist.append(CleanBB->pred_begin(), CleanBB->pred_end()); - } - return false; -} - -/// This function updates the branches post restore point split. -/// -/// Restore point has been split. -/// Old restore point: MBB -/// New restore point: NMBB -/// Any basic block(say BBToUpdate) which had a fallthrough to MBB -/// previously should -/// 1. Fallthrough to NMBB iff NMBB is inserted immediately above MBB in the -/// block layout OR -/// 2. Branch unconditionally to NMBB iff NMBB is inserted at any other place. -static void updateTerminator(MachineBasicBlock *BBToUpdate, - MachineBasicBlock *NMBB, - const TargetInstrInfo *TII) { - DebugLoc DL = BBToUpdate->findBranchDebugLoc(); - // if NMBB isn't the new layout successor for BBToUpdate, insert unconditional - // branch to it - if (!BBToUpdate->isLayoutSuccessor(NMBB)) - TII->insertUnconditionalBranch(*BBToUpdate, NMBB, DL); -} - -/// This function splits the restore point and returns new restore point/BB. -/// -/// DirtyPreds: Predessors of \p MBB that are ReachableByDirty -/// -/// Decision has been made to split the restore point. -/// old restore point: \p MBB -/// new restore point: \p NMBB -/// This function makes the necessary block layout changes so that -/// 1. \p NMBB points to \p MBB unconditionally -/// 2. All dirtyPreds that previously pointed to \p MBB point to \p NMBB -static MachineBasicBlock * -tryToSplitRestore(MachineBasicBlock *MBB, - ArrayRef DirtyPreds, - const TargetInstrInfo *TII) { - MachineFunction *MF = MBB->getParent(); - - // get the list of DirtyPreds who have a fallthrough to MBB - // before the block layout change. This is just to ensure that if the NMBB is - // inserted after MBB, then we create unconditional branch from - // DirtyPred/CleanPred to NMBB - SmallPtrSet MBBFallthrough; - for (MachineBasicBlock *BB : DirtyPreds) - if (BB->getFallThrough(false) == MBB) - MBBFallthrough.insert(BB); - - MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); - MF->insert(MachineFunction::iterator(MBB), NMBB); - - for (const MachineBasicBlock::RegisterMaskPair &LI : MBB->liveins()) - NMBB->addLiveIn(LI.PhysReg); - - TII->insertUnconditionalBranch(*NMBB, MBB, DebugLoc()); - - // After splitting, all predecessors of the restore point should be dirty - // blocks. - for (MachineBasicBlock *SuccBB : DirtyPreds) - SuccBB->ReplaceUsesOfBlockWith(MBB, NMBB); - - NMBB->addSuccessor(MBB); - - for (MachineBasicBlock *BBToUpdate : MBBFallthrough) - updateTerminator(BBToUpdate, NMBB, TII); - - return NMBB; -} - -/// This function undoes the restore point split done earlier. -/// -/// DirtyPreds: All predecessors of \p NMBB that are ReachableByDirty. -/// -/// Restore point was split and the change needs to be unrolled. Make necessary -/// changes to reset restore point from \p NMBB to \p MBB. -static void rollbackRestoreSplit(MachineFunction &MF, MachineBasicBlock *NMBB, - MachineBasicBlock *MBB, - ArrayRef DirtyPreds, - const TargetInstrInfo *TII) { - // For a BB, if NMBB is fallthrough in the current layout, then in the new - // layout a. BB should fallthrough to MBB OR b. BB should undconditionally - // branch to MBB - SmallPtrSet NMBBFallthrough; - for (MachineBasicBlock *BB : DirtyPreds) - if (BB->getFallThrough(false) == NMBB) - NMBBFallthrough.insert(BB); - - NMBB->removeSuccessor(MBB); - for (MachineBasicBlock *SuccBB : DirtyPreds) - SuccBB->ReplaceUsesOfBlockWith(NMBB, MBB); - - NMBB->erase(NMBB->begin(), NMBB->end()); - NMBB->eraseFromParent(); - - for (MachineBasicBlock *BBToUpdate : NMBBFallthrough) - updateTerminator(BBToUpdate, MBB, TII); -} - -// A block is deemed fit for restore point split iff there exist -// 1. DirtyPreds - preds of CurRestore reachable from use or def of CSR/FI -// 2. CleanPreds - preds of CurRestore that arent DirtyPreds -bool ShrinkWrap::checkIfRestoreSplittable( - const MachineBasicBlock *CurRestore, - const DenseSet &ReachableByDirty, - SmallVectorImpl &DirtyPreds, - SmallVectorImpl &CleanPreds, - const TargetInstrInfo *TII, RegScavenger *RS) { - for (const MachineInstr &MI : *CurRestore) - if (useOrDefCSROrFI(MI, RS)) - return false; - - for (MachineBasicBlock *PredBB : CurRestore->predecessors()) { - if (!isAnalyzableBB(*TII, *PredBB)) - return false; - - if (ReachableByDirty.count(PredBB)) - DirtyPreds.push_back(PredBB); - else - CleanPreds.push_back(PredBB); - } - - return !(CleanPreds.empty() || DirtyPreds.empty()); -} - -bool ShrinkWrap::postShrinkWrapping(bool HasCandidate, MachineFunction &MF, - RegScavenger *RS) { - if (!EnablePostShrinkWrapOpt) - return false; - - MachineBasicBlock *InitSave = nullptr; - MachineBasicBlock *InitRestore = nullptr; - - if (HasCandidate) { - InitSave = Save; - InitRestore = Restore; - } else { - InitRestore = nullptr; - InitSave = &MF.front(); - for (MachineBasicBlock &MBB : MF) { - if (MBB.isEHFuncletEntry()) - return false; - if (MBB.isReturnBlock()) { - // Do not support multiple restore points. - if (InitRestore) - return false; - InitRestore = &MBB; - } - } - } - - if (!InitSave || !InitRestore || InitRestore == InitSave || - !MDT->dominates(InitSave, InitRestore) || - !MPDT->dominates(InitRestore, InitSave)) - return false; - - DenseSet DirtyBBs; - for (MachineBasicBlock &MBB : MF) { - if (MBB.isEHPad()) { - DirtyBBs.insert(&MBB); - continue; - } - for (const MachineInstr &MI : MBB) - if (useOrDefCSROrFI(MI, RS)) { - DirtyBBs.insert(&MBB); - break; - } - } - - // Find blocks reachable from the use or def of CSRs/FI. - DenseSet ReachableByDirty; - collectBlocksReachableByDirty(DirtyBBs, ReachableByDirty); - - const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); - SmallVector DirtyPreds; - SmallVector CleanPreds; - if (!checkIfRestoreSplittable(InitRestore, ReachableByDirty, DirtyPreds, - CleanPreds, TII, RS)) - return false; - - // Trying to reach out to the new save point which dominates all dirty blocks. - MachineBasicBlock *NewSave = - FindIDom<>(**DirtyPreds.begin(), DirtyPreds, *MDT, false); - - while (NewSave && (hasDirtyPred(ReachableByDirty, *NewSave) || - EntryFreq < MBFI->getBlockFreq(NewSave).getFrequency())) - NewSave = FindIDom<>(**NewSave->pred_begin(), NewSave->predecessors(), *MDT, - false); - - const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering(); - if (!NewSave || NewSave == InitSave || - isSaveReachableThroughClean(NewSave, CleanPreds) || - !TFI->canUseAsPrologue(*NewSave)) - return false; - - // Now we know that splitting a restore point can isolate the restore point - // from clean blocks and doing so can shrink the save point. - MachineBasicBlock *NewRestore = - tryToSplitRestore(InitRestore, DirtyPreds, TII); - - // Make sure if the new restore point is valid as an epilogue, depending on - // targets. - if (!TFI->canUseAsEpilogue(*NewRestore)) { - rollbackRestoreSplit(MF, NewRestore, InitRestore, DirtyPreds, TII); - return false; - } - - Save = NewSave; - Restore = NewRestore; - - MDT->runOnMachineFunction(MF); - MPDT->runOnMachineFunction(MF); - - assert((MDT->dominates(Save, Restore) && MPDT->dominates(Restore, Save)) && - "Incorrect save or restore point due to dominance relations"); - assert((!MLI->getLoopFor(Save) && !MLI->getLoopFor(Restore)) && - "Unexpected save or restore point in a loop"); - assert((EntryFreq >= MBFI->getBlockFreq(Save).getFrequency() && - EntryFreq >= MBFI->getBlockFreq(Restore).getFrequency()) && - "Incorrect save or restore point based on block frequency"); - return true; -} - void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB, RegScavenger *RS) { // Get rid of the easy cases first. @@ -775,7 +463,31 @@ static bool giveUpWithRemarks(MachineOptimizationRemarkEmitter *ORE, return false; } -bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) { +bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) + return false; + + LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); + + init(MF); + + ReversePostOrderTraversal RPOT(&*MF.begin()); + if (containsIrreducibleCFG(RPOT, *MLI)) { + // If MF is irreducible, a block may be in a loop without + // MachineLoopInfo reporting it. I.e., we may use the + // post-dominance property in loops, which lead to incorrect + // results. Moreover, we may miss that the prologue and + // epilogue are not in the same loop, leading to unbalanced + // construction/deconstruction of the stack frame. + return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG", + "Irreducible CFGs are not supported yet.", + MF.getFunction().getSubprogram(), &MF.front()); + } + + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); + std::unique_ptr RS( + TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); + for (MachineBasicBlock &MBB : MF) { LLVM_DEBUG(dbgs() << "Look into: " << MBB.getNumber() << ' ' << MBB.getName() << '\n'); @@ -791,7 +503,7 @@ bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) { // are at least at the boundary of the save and restore points. The // problem is that a basic block can jump out from the middle in these // cases, which we do not handle. - updateSaveRestorePoints(MBB, RS); + updateSaveRestorePoints(MBB, RS.get()); if (!ArePointsInteresting()) { LLVM_DEBUG(dbgs() << "EHPad/inlineasm_br prevents shrink-wrapping\n"); return false; @@ -800,11 +512,11 @@ bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) { } for (const MachineInstr &MI : MBB) { - if (!useOrDefCSROrFI(MI, RS)) + if (!useOrDefCSROrFI(MI, RS.get())) continue; // Save (resp. restore) point must dominate (resp. post dominate) // MI. Look for the proper basic block for those. - updateSaveRestorePoints(MBB, RS); + updateSaveRestorePoints(MBB, RS.get()); // If we are at a point where we cannot improve the placement of // save/restore instructions, just give up. if (!ArePointsInteresting()) { @@ -858,49 +570,13 @@ bool ShrinkWrap::performShrinkWrapping(MachineFunction &MF, RegScavenger *RS) { break; NewBB = Restore; } - updateSaveRestorePoints(*NewBB, RS); + updateSaveRestorePoints(*NewBB, RS.get()); } while (Save && Restore); if (!ArePointsInteresting()) { ++NumCandidatesDropped; return false; } - return true; -} - -bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { - if (skipFunction(MF.getFunction()) || MF.empty() || !isShrinkWrapEnabled(MF)) - return false; - - LLVM_DEBUG(dbgs() << "**** Analysing " << MF.getName() << '\n'); - - init(MF); - - ReversePostOrderTraversal RPOT(&*MF.begin()); - if (containsIrreducibleCFG(RPOT, *MLI)) { - // If MF is irreducible, a block may be in a loop without - // MachineLoopInfo reporting it. I.e., we may use the - // post-dominance property in loops, which lead to incorrect - // results. Moreover, we may miss that the prologue and - // epilogue are not in the same loop, leading to unbalanced - // construction/deconstruction of the stack frame. - return giveUpWithRemarks(ORE, "UnsupportedIrreducibleCFG", - "Irreducible CFGs are not supported yet.", - MF.getFunction().getSubprogram(), &MF.front()); - } - - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - std::unique_ptr RS( - TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr); - - bool Changed = false; - - bool HasCandidate = performShrinkWrapping(MF, RS.get()); - Changed = postShrinkWrapping(HasCandidate, MF, RS.get()); - if (!HasCandidate && !Changed) - return false; - if (!ArePointsInteresting()) - return Changed; LLVM_DEBUG(dbgs() << "Final shrink wrap candidates:\nSave: " << Save->getNumber() << ' ' << Save->getName() @@ -911,7 +587,7 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { MFI.setSavePoint(Save); MFI.setRestorePoint(Restore); ++NumCandidates; - return Changed; + return false; } bool ShrinkWrap::isShrinkWrapEnabled(const MachineFunction &MF) { diff --git a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir index 34fafb750083c..bc60b7b571197 100644 --- a/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir +++ b/llvm/test/CodeGen/AArch64/dont-shrink-wrap-stack-mayloadorstore.mir @@ -6,8 +6,8 @@ ; RUN: llc -x=mir -simplify-mir -run-pass=shrink-wrap -o - %s | FileCheck %s ; CHECK: name: compiler_pop_stack ; CHECK: frameInfo: - ; CHECK: savePoint: '%bb.1' - ; CHECK-NEXT: restorePoint: '%bb.7' + ; CHECK-NOT: savePoint: + ; CHECK-NOT: restorePoint: ; CHECK: stack: ; CHECK: name: f ; CHECK: frameInfo: diff --git a/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir b/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir deleted file mode 100644 index fc44f8ed97941..0000000000000 --- a/llvm/test/CodeGen/AArch64/shrinkwrap-split-restore-point.mir +++ /dev/null @@ -1,686 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 -# RUN: llc -mtriple=aarch64 -run-pass=shrink-wrap -o - %s | FileCheck %s - ---- | - define void @shrink_test1(i32 %a) { - entry: - %cmp5 = icmp sgt i32 %a, 0 - br i1 %cmp5, label %BB0, label %exit - - BB0: ; preds = %entry - %call = call i32 @fun() - %c = icmp eq i32 %call, 0 - br i1 %c, label %BB1, label %exit - - BB1: ; preds = %BB0 - %call2 = call i32 @fun() - br label %exit - - exit: ; preds = %BB1, %BB0, %entry - ret void - } - - define void @shrink_test2(i32 %a, ptr %P1, ptr %P2) { - BB00: - %cmp5 = icmp sgt i32 %a, 0 - br i1 %cmp5, label %BB01, label %exit - - BB01: ; preds = %BB00 - store i32 %a, ptr %P1, align 4 - %c1 = icmp sgt i32 %a, 1 - br i1 %c1, label %BB02, label %BB03 - - BB02: ; preds = %BB01 - store i32 %a, ptr %P2, align 4 - br label %BB03 - - BB03: ; preds = %BB02, %BB01 - %call03 = call i32 @fun() - %c03 = icmp eq i32 %call03, 0 - br i1 %c03, label %BB04, label %BB05 - - BB04: ; preds = %BB03 - %call04 = call i32 @fun() - br label %BB05 - - BB05: ; preds = %BB04, %BB03 - %call05 = call i32 @fun() - %c05 = icmp eq i32 %call05, 0 - br i1 %c05, label %BB06, label %BB07 - - BB06: ; preds = %BB05 - %call06 = call i32 @fun() - br label %exit - - BB07: ; preds = %BB05 - %call07 = call i32 @fun2() - br label %exit - - exit: ; preds = %BB07, %BB06, %BB00 - ret void - } - - define void @noshrink_test1(i32 %a, i32 %v, i32 %v2) { - entry: - %cmp5 = icmp sgt i32 %a, 0 - br i1 %cmp5, label %BB0, label %exit - - BB0: ; preds = %entry - %c = icmp eq i32 %a, 10 - %c1 = icmp eq i32 %v, 10 - %or.cond = select i1 %c, i1 %c1, i1 false - br i1 %or.cond, label %BB3, label %BB2 - - BB2: ; preds = %BB0 - %c2 = icmp eq i32 %v2, 10 - br i1 %c2, label %BB4, label %exit - - BB3: ; preds = %BB0 - %call3 = call i32 @fun() - br label %exit - - BB4: ; preds = %BB2 - %call4 = call i32 @fun2() - br label %exit - - exit: ; preds = %BB4, %BB3, %BB2, %entry - ret void - } - - define void @noshrink_test2(i32 %a) { - BB00: - %cmp5 = icmp sgt i32 %a, 0 - br i1 %cmp5, label %BB01, label %InfLoop.preheader - - InfLoop.preheader: ; preds = %BB00 - br label %InfLoop - - BB01: ; preds = %BB00 - %call = call i32 @fun() - %c = icmp eq i32 %call, 0 - br i1 %c, label %BB02, label %exit - - BB02: ; preds = %BB01 - %call2 = call i32 @fun() - br label %exit - - InfLoop: ; preds = %InfLoop.preheader, %InfLoop - %call3 = call i32 @fun() - br label %InfLoop - - exit: ; preds = %BB02, %BB01 - ret void - } - - define void @noshrink_test3(i32 %a) { - BB00: - %cmp5 = icmp sgt i32 %a, 0 - %call02 = call i32 @fun() - br i1 %cmp5, label %BB02, label %BB01 - - BB01: ; preds = %BB00 - %0 = icmp eq i32 %call02, 0 - br i1 %0, label %BB01.1, label %exit - - BB01.1: ; preds = %BB01 - call void @abort() #0 - unreachable - - BB02: ; preds = %BB00 - %1 = icmp eq i32 %call02, 0 - br i1 %1, label %BB03, label %BB04 - - BB03: ; preds = %BB02 - %call03 = call i32 @fun() - %c03 = icmp eq i32 %call03, 0 - br i1 %c03, label %BB04, label %exit - - BB04: ; preds = %BB03, %BB02 - %call04 = call i32 @fun() - br label %exit - - exit: ; preds = %BB04, %BB03, %BB01 - ret void - } - - declare i32 @fun() - declare i32 @fun2() - declare void @abort() - -... ---- -name: shrink_test1 -alignment: 4 -tracksRegLiveness: true -tracksDebugUserValues: true -liveins: - - { reg: '$w0' } -frameInfo: - maxAlignment: 1 - adjustsStack: true - hasCalls: true - maxCallFrameSize: 0 -machineFunctionInfo: {} -body: | - ; CHECK-LABEL: name: shrink_test1 - ; CHECK: bb.0.entry: - ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.3(0x30000000) - ; CHECK-NEXT: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv - ; CHECK-NEXT: Bcc 11, %bb.3, implicit killed $nzcv - ; CHECK-NEXT: B %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1.BB0: - ; CHECK-NEXT: successors: %bb.2(0x30000000), %bb.4(0x50000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.4 - ; CHECK-NEXT: B %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.BB1: - ; CHECK-NEXT: successors: %bb.4(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4: - ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: B %bb.3 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3.exit: - ; CHECK-NEXT: RET_ReallyLR - bb.0.entry: - successors: %bb.1(0x50000000), %bb.3(0x30000000) - liveins: $w0 - - dead $wzr = SUBSWri killed renamable $w0, 1, 0, implicit-def $nzcv - Bcc 11, %bb.3, implicit killed $nzcv - B %bb.1 - - bb.1.BB0: - successors: %bb.2(0x30000000), %bb.3(0x50000000) - - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - CBNZW killed renamable $w0, %bb.3 - B %bb.2 - - bb.2.BB1: - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - - bb.3.exit: - RET_ReallyLR - -... ---- -name: shrink_test2 -alignment: 4 -tracksRegLiveness: true -tracksDebugUserValues: true -liveins: - - { reg: '$w0' } - - { reg: '$x1' } - - { reg: '$x2' } -frameInfo: - maxAlignment: 1 - adjustsStack: true - hasCalls: true - maxCallFrameSize: 0 -machineFunctionInfo: {} -body: | - ; CHECK-LABEL: name: shrink_test2 - ; CHECK: bb.0.BB00: - ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.8(0x30000000) - ; CHECK-NEXT: liveins: $w0, $x1, $x2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv - ; CHECK-NEXT: Bcc 11, %bb.8, implicit killed $nzcv - ; CHECK-NEXT: B %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1.BB01: - ; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) - ; CHECK-NEXT: liveins: $w0, $x1, $x2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv - ; CHECK-NEXT: STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1) - ; CHECK-NEXT: Bcc 11, %bb.3, implicit killed $nzcv - ; CHECK-NEXT: B %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.BB02: - ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: liveins: $w0, $x2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3.BB03: - ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.5(0x50000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5 - ; CHECK-NEXT: B %bb.4 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4.BB04: - ; CHECK-NEXT: successors: %bb.5(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.5.BB05: - ; CHECK-NEXT: successors: %bb.6(0x30000000), %bb.7(0x50000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.7 - ; CHECK-NEXT: B %bb.6 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.6.BB06: - ; CHECK-NEXT: successors: %bb.9(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: B %bb.9 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.7.BB07: - ; CHECK-NEXT: successors: %bb.9(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.9: - ; CHECK-NEXT: successors: %bb.8(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: B %bb.8 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.8.exit: - ; CHECK-NEXT: RET_ReallyLR - bb.0.BB00: - successors: %bb.1(0x50000000), %bb.8(0x30000000) - liveins: $w0, $x1, $x2 - - dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv - Bcc 11, %bb.8, implicit killed $nzcv - B %bb.1 - - bb.1.BB01: - successors: %bb.2, %bb.3 - liveins: $w0, $x1, $x2 - - dead $wzr = SUBSWri renamable $w0, 2, 0, implicit-def $nzcv - STRWui renamable $w0, killed renamable $x1, 0 :: (store (s32) into %ir.P1) - Bcc 11, %bb.3, implicit killed $nzcv - B %bb.2 - - bb.2.BB02: - liveins: $w0, $x2 - - STRWui killed renamable $w0, killed renamable $x2, 0 :: (store (s32) into %ir.P2) - - bb.3.BB03: - successors: %bb.4(0x30000000), %bb.5(0x50000000) - - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - CBNZW killed renamable $w0, %bb.5 - B %bb.4 - - bb.4.BB04: - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - - bb.5.BB05: - successors: %bb.6(0x30000000), %bb.7(0x50000000) - - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - CBNZW killed renamable $w0, %bb.7 - B %bb.6 - - bb.6.BB06: - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - B %bb.8 - - bb.7.BB07: - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - - bb.8.exit: - RET_ReallyLR - -... ---- -name: noshrink_test1 -alignment: 4 -tracksRegLiveness: true -tracksDebugUserValues: true -liveins: - - { reg: '$w0' } - - { reg: '$w1' } - - { reg: '$w2' } -frameInfo: - maxAlignment: 1 - adjustsStack: true - hasCalls: true - maxCallFrameSize: 0 -machineFunctionInfo: {} -body: | - ; CHECK-LABEL: name: noshrink_test1 - ; CHECK: bb.0.entry: - ; CHECK-NEXT: successors: %bb.1(0x50000000), %bb.6(0x30000000) - ; CHECK-NEXT: liveins: $w0, $w1, $w2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv - ; CHECK-NEXT: Bcc 11, %bb.6, implicit killed $nzcv - ; CHECK-NEXT: B %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1.BB0: - ; CHECK-NEXT: successors: %bb.2(0x60000000), %bb.3(0x20000000) - ; CHECK-NEXT: liveins: $w0, $w1, $w2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv - ; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv - ; CHECK-NEXT: B %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.BB0: - ; CHECK-NEXT: successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab) - ; CHECK-NEXT: liveins: $w1, $w2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv - ; CHECK-NEXT: Bcc 0, %bb.4, implicit killed $nzcv - ; CHECK-NEXT: B %bb.3 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3.BB2: - ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) - ; CHECK-NEXT: liveins: $w2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv - ; CHECK-NEXT: Bcc 0, %bb.5, implicit killed $nzcv - ; CHECK-NEXT: B %bb.6 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4.BB3: - ; CHECK-NEXT: successors: %bb.6(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: B %bb.6 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.5.BB4: - ; CHECK-NEXT: successors: %bb.6(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.6.exit: - ; CHECK-NEXT: RET_ReallyLR - bb.0.entry: - successors: %bb.1(0x50000000), %bb.6(0x30000000) - liveins: $w0, $w1, $w2 - - dead $wzr = SUBSWri renamable $w0, 1, 0, implicit-def $nzcv - Bcc 11, %bb.6, implicit killed $nzcv - B %bb.1 - - bb.1.BB0: - successors: %bb.2(0x60000000), %bb.3(0x20000000) - liveins: $w0, $w1, $w2 - - dead $wzr = SUBSWri killed renamable $w0, 10, 0, implicit-def $nzcv - Bcc 1, %bb.3, implicit killed $nzcv - B %bb.2 - - bb.2.BB0: - successors: %bb.4(0x55555555), %bb.3(0x2aaaaaab) - liveins: $w1, $w2 - - dead $wzr = SUBSWri killed renamable $w1, 10, 0, implicit-def $nzcv - Bcc 0, %bb.4, implicit killed $nzcv - B %bb.3 - - bb.3.BB2: - liveins: $w2 - - dead $wzr = SUBSWri killed renamable $w2, 10, 0, implicit-def $nzcv - Bcc 0, %bb.5, implicit killed $nzcv - B %bb.6 - - bb.4.BB3: - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - B %bb.6 - - bb.5.BB4: - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun2, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - - bb.6.exit: - RET_ReallyLR - -... ---- -name: noshrink_test2 -alignment: 4 -tracksRegLiveness: true -tracksDebugUserValues: true -liveins: - - { reg: '$w0' } -frameInfo: - maxAlignment: 1 - adjustsStack: true - hasCalls: true - maxCallFrameSize: 0 -machineFunctionInfo: {} -body: | - ; CHECK-LABEL: name: noshrink_test2 - ; CHECK: bb.0.BB00: - ; CHECK-NEXT: successors: %bb.2(0x50000000), %bb.1(0x30000000) - ; CHECK-NEXT: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv - ; CHECK-NEXT: Bcc 12, %bb.2, implicit killed $nzcv - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1: - ; CHECK-NEXT: successors: %bb.4(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: B %bb.4 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.BB01: - ; CHECK-NEXT: successors: %bb.3(0x30000000), %bb.5(0x50000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5 - ; CHECK-NEXT: B %bb.3 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3.BB02: - ; CHECK-NEXT: successors: %bb.5(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: B %bb.5 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4.InfLoop: - ; CHECK-NEXT: successors: %bb.4(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: B %bb.4 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.5.exit: - ; CHECK-NEXT: RET_ReallyLR - bb.0.BB00: - successors: %bb.2(0x50000000), %bb.1(0x30000000) - liveins: $w0 - - dead $wzr = SUBSWri killed renamable $w0, 0, 0, implicit-def $nzcv - Bcc 12, %bb.2, implicit killed $nzcv - - bb.1: - B %bb.4 - - bb.2.BB01: - successors: %bb.3(0x30000000), %bb.5(0x50000000) - - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - CBNZW killed renamable $w0, %bb.5 - B %bb.3 - - bb.3.BB02: - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - B %bb.5 - - bb.4.InfLoop: - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - B %bb.4 - - bb.5.exit: - RET_ReallyLR - -... ---- -name: noshrink_test3 -alignment: 4 -tracksRegLiveness: true -tracksDebugUserValues: true -liveins: - - { reg: '$w0' } -frameInfo: - maxAlignment: 1 - adjustsStack: true - hasCalls: true - maxCallFrameSize: 0 -machineFunctionInfo: {} -body: | - ; CHECK-LABEL: name: noshrink_test3 - ; CHECK: bb.0.BB00: - ; CHECK-NEXT: successors: %bb.3(0x50000000), %bb.1(0x30000000) - ; CHECK-NEXT: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $w19 = COPY $w0 - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv - ; CHECK-NEXT: Bcc 12, %bb.3, implicit killed $nzcv - ; CHECK-NEXT: B %bb.1 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.1.BB01: - ; CHECK-NEXT: successors: %bb.2(0x00000800), %bb.6(0x7ffff800) - ; CHECK-NEXT: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.6 - ; CHECK-NEXT: B %bb.2 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.2.BB01.1: - ; CHECK-NEXT: successors: %bb.3(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.3.BB02: - ; CHECK-NEXT: successors: %bb.4(0x30000000), %bb.5(0x50000000) - ; CHECK-NEXT: liveins: $w0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.5 - ; CHECK-NEXT: B %bb.4 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.4.BB03: - ; CHECK-NEXT: successors: %bb.5(0x30000000), %bb.6(0x50000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: CBNZW killed renamable $w0, %bb.6 - ; CHECK-NEXT: B %bb.5 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.5.BB04: - ; CHECK-NEXT: successors: %bb.6(0x80000000) - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: bb.6.exit: - ; CHECK-NEXT: RET_ReallyLR - bb.0.BB00: - successors: %bb.3(0x50000000), %bb.1(0x30000000) - liveins: $w0 - - renamable $w19 = COPY $w0 - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - dead $wzr = SUBSWri killed renamable $w19, 0, 0, implicit-def $nzcv - Bcc 12, %bb.3, implicit killed $nzcv - B %bb.1 - - bb.1.BB01: - successors: %bb.2(0x00000800), %bb.6(0x7ffff800) - liveins: $w0 - - CBNZW killed renamable $w0, %bb.6 - B %bb.2 - - bb.2.BB01.1: - - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @abort, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - - bb.3.BB02: - successors: %bb.4(0x30000000), %bb.5(0x50000000) - liveins: $w0 - - CBNZW killed renamable $w0, %bb.5 - B %bb.4 - - bb.4.BB03: - successors: %bb.5(0x30000000), %bb.6(0x50000000) - - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - CBNZW killed renamable $w0, %bb.6 - B %bb.5 - - bb.5.BB04: - ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp - BL @fun, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def dead $w0 - ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - - bb.6.exit: - RET_ReallyLR - -... diff --git a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll index 58ab0f5250d00..3d60686d5a116 100644 --- a/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll +++ b/llvm/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll @@ -5,11 +5,11 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LE-LABEL: add_user: ; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: .save {r4, lr} +; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: cmp r0, #1 ; CHECK-LE-NEXT: blt .LBB0_4 ; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-LE-NEXT: .save {r4, lr} -; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: subs r2, #2 ; CHECK-LE-NEXT: subs r3, #2 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -23,24 +23,23 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado ; CHECK-LE-NEXT: smlad r12, r4, lr, r12 ; CHECK-LE-NEXT: sxtah r1, r1, lr ; CHECK-LE-NEXT: bne .LBB0_2 -; CHECK-LE-NEXT: @ %bb.3: -; CHECK-LE-NEXT: pop.w {r4, lr} +; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: bx lr +; CHECK-LE-NEXT: pop {r4, pc} ; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB0_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: bx lr +; CHECK-LE-NEXT: pop {r4, pc} ; ; CHECK-BE-LABEL: add_user: ; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: .save {r4, r5, r6, lr} +; CHECK-BE-NEXT: push {r4, r5, r6, lr} ; CHECK-BE-NEXT: cmp r0, #1 ; CHECK-BE-NEXT: blt .LBB0_4 ; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-BE-NEXT: .save {r4, r5, r6, lr} -; CHECK-BE-NEXT: push {r4, r5, r6, lr} ; CHECK-BE-NEXT: subs r2, #2 ; CHECK-BE-NEXT: subs r3, #2 ; CHECK-BE-NEXT: mov.w r12, #0 @@ -57,16 +56,15 @@ define i32 @add_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado ; CHECK-BE-NEXT: subs r0, #1 ; CHECK-BE-NEXT: smlabb r12, r6, r4, r5 ; CHECK-BE-NEXT: bne .LBB0_2 -; CHECK-BE-NEXT: @ %bb.3: -; CHECK-BE-NEXT: pop.w {r4, r5, r6, lr} +; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: bx lr +; CHECK-BE-NEXT: pop {r4, r5, r6, pc} ; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB0_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: bx lr +; CHECK-BE-NEXT: pop {r4, r5, r6, pc} entry: %cmp24 = icmp sgt i32 %arg, 0 br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup @@ -111,11 +109,11 @@ for.body: define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LE-LABEL: mul_bottom_user: ; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: .save {r4, r5, r7, lr} +; CHECK-LE-NEXT: push {r4, r5, r7, lr} ; CHECK-LE-NEXT: cmp r0, #1 ; CHECK-LE-NEXT: blt .LBB1_4 ; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-LE-NEXT: .save {r4, r5, r7, lr} -; CHECK-LE-NEXT: push {r4, r5, r7, lr} ; CHECK-LE-NEXT: sub.w lr, r2, #2 ; CHECK-LE-NEXT: subs r3, #2 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -130,24 +128,23 @@ define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocaptur ; CHECK-LE-NEXT: subs r0, #1 ; CHECK-LE-NEXT: mul r1, r5, r1 ; CHECK-LE-NEXT: bne .LBB1_2 -; CHECK-LE-NEXT: @ %bb.3: -; CHECK-LE-NEXT: pop.w {r4, r5, r7, lr} +; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: bx lr +; CHECK-LE-NEXT: pop {r4, r5, r7, pc} ; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB1_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: bx lr +; CHECK-LE-NEXT: pop {r4, r5, r7, pc} ; ; CHECK-BE-LABEL: mul_bottom_user: ; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: .save {r4, r5, r6, lr} +; CHECK-BE-NEXT: push {r4, r5, r6, lr} ; CHECK-BE-NEXT: cmp r0, #1 ; CHECK-BE-NEXT: blt .LBB1_4 ; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-BE-NEXT: .save {r4, r5, r6, lr} -; CHECK-BE-NEXT: push {r4, r5, r6, lr} ; CHECK-BE-NEXT: subs r2, #2 ; CHECK-BE-NEXT: subs r3, #2 ; CHECK-BE-NEXT: mov.w r12, #0 @@ -164,16 +161,15 @@ define i32 @mul_bottom_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocaptur ; CHECK-BE-NEXT: subs r0, #1 ; CHECK-BE-NEXT: mul r1, lr, r1 ; CHECK-BE-NEXT: bne .LBB1_2 -; CHECK-BE-NEXT: @ %bb.3: -; CHECK-BE-NEXT: pop.w {r4, r5, r6, lr} +; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: bx lr +; CHECK-BE-NEXT: pop {r4, r5, r6, pc} ; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB1_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: bx lr +; CHECK-BE-NEXT: pop {r4, r5, r6, pc} entry: %cmp24 = icmp sgt i32 %arg, 0 br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup @@ -218,11 +214,11 @@ for.body: define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LE-LABEL: mul_top_user: ; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: .save {r4, lr} +; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: cmp r0, #1 ; CHECK-LE-NEXT: blt .LBB2_4 ; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-LE-NEXT: .save {r4, lr} -; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: subs r2, #2 ; CHECK-LE-NEXT: subs r3, #2 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -237,24 +233,23 @@ define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture r ; CHECK-LE-NEXT: asr.w r4, r4, #16 ; CHECK-LE-NEXT: mul r1, r4, r1 ; CHECK-LE-NEXT: bne .LBB2_2 -; CHECK-LE-NEXT: @ %bb.3: -; CHECK-LE-NEXT: pop.w {r4, lr} +; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: bx lr +; CHECK-LE-NEXT: pop {r4, pc} ; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB2_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: bx lr +; CHECK-LE-NEXT: pop {r4, pc} ; ; CHECK-BE-LABEL: mul_top_user: ; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: .save {r4, r5, r6, lr} +; CHECK-BE-NEXT: push {r4, r5, r6, lr} ; CHECK-BE-NEXT: cmp r0, #1 ; CHECK-BE-NEXT: blt .LBB2_4 ; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-BE-NEXT: .save {r4, r5, r6, lr} -; CHECK-BE-NEXT: push {r4, r5, r6, lr} ; CHECK-BE-NEXT: subs r2, #2 ; CHECK-BE-NEXT: subs r3, #2 ; CHECK-BE-NEXT: mov.w r12, #0 @@ -271,16 +266,15 @@ define i32 @mul_top_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture r ; CHECK-BE-NEXT: subs r0, #1 ; CHECK-BE-NEXT: mul r1, r6, r1 ; CHECK-BE-NEXT: bne .LBB2_2 -; CHECK-BE-NEXT: @ %bb.3: -; CHECK-BE-NEXT: pop.w {r4, r5, r6, lr} +; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: bx lr +; CHECK-BE-NEXT: pop {r4, r5, r6, pc} ; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB2_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: bx lr +; CHECK-BE-NEXT: pop {r4, r5, r6, pc} entry: %cmp24 = icmp sgt i32 %arg, 0 br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup @@ -325,11 +319,11 @@ for.body: define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture readonly %arg2, ptr nocapture readonly %arg3) { ; CHECK-LE-LABEL: and_user: ; CHECK-LE: @ %bb.0: @ %entry +; CHECK-LE-NEXT: .save {r4, lr} +; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: cmp r0, #1 ; CHECK-LE-NEXT: blt .LBB3_4 ; CHECK-LE-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-LE-NEXT: .save {r4, lr} -; CHECK-LE-NEXT: push {r4, lr} ; CHECK-LE-NEXT: sub.w lr, r2, #2 ; CHECK-LE-NEXT: subs r3, #2 ; CHECK-LE-NEXT: mov.w r12, #0 @@ -344,24 +338,23 @@ define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado ; CHECK-LE-NEXT: uxth r2, r2 ; CHECK-LE-NEXT: mul r1, r2, r1 ; CHECK-LE-NEXT: bne .LBB3_2 -; CHECK-LE-NEXT: @ %bb.3: -; CHECK-LE-NEXT: pop.w {r4, lr} +; CHECK-LE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: bx lr +; CHECK-LE-NEXT: pop {r4, pc} ; CHECK-LE-NEXT: .p2align 2 ; CHECK-LE-NEXT: .LBB3_4: ; CHECK-LE-NEXT: mov.w r12, #0 ; CHECK-LE-NEXT: movs r1, #0 ; CHECK-LE-NEXT: add.w r0, r12, r1 -; CHECK-LE-NEXT: bx lr +; CHECK-LE-NEXT: pop {r4, pc} ; ; CHECK-BE-LABEL: and_user: ; CHECK-BE: @ %bb.0: @ %entry +; CHECK-BE-NEXT: .save {r4, r5, r6, lr} +; CHECK-BE-NEXT: push {r4, r5, r6, lr} ; CHECK-BE-NEXT: cmp r0, #1 ; CHECK-BE-NEXT: blt .LBB3_4 ; CHECK-BE-NEXT: @ %bb.1: @ %for.body.preheader -; CHECK-BE-NEXT: .save {r4, r5, r6, lr} -; CHECK-BE-NEXT: push {r4, r5, r6, lr} ; CHECK-BE-NEXT: subs r2, #2 ; CHECK-BE-NEXT: subs r3, #2 ; CHECK-BE-NEXT: mov.w r12, #0 @@ -378,16 +371,15 @@ define i32 @and_user(i32 %arg, ptr nocapture readnone %arg1, ptr nocapture reado ; CHECK-BE-NEXT: subs r0, #1 ; CHECK-BE-NEXT: mul r1, lr, r1 ; CHECK-BE-NEXT: bne .LBB3_2 -; CHECK-BE-NEXT: @ %bb.3: -; CHECK-BE-NEXT: pop.w {r4, r5, r6, lr} +; CHECK-BE-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: bx lr +; CHECK-BE-NEXT: pop {r4, r5, r6, pc} ; CHECK-BE-NEXT: .p2align 2 ; CHECK-BE-NEXT: .LBB3_4: ; CHECK-BE-NEXT: mov.w r12, #0 ; CHECK-BE-NEXT: movs r1, #0 ; CHECK-BE-NEXT: add.w r0, r12, r1 -; CHECK-BE-NEXT: bx lr +; CHECK-BE-NEXT: pop {r4, r5, r6, pc} entry: %cmp24 = icmp sgt i32 %arg, 0 br i1 %cmp24, label %for.body.preheader, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll index c9724674afd82..2755d354a6244 100644 --- a/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll +++ b/llvm/test/CodeGen/ARM/ssat-unroll-loops.ll @@ -6,11 +6,11 @@ define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) { ; CHECK-LABEL: ssat_unroll: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: bxeq lr -; CHECK-NEXT: .LBB0_1: @ %while.body.preheader ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: beq .LBB0_5 +; CHECK-NEXT: @ %bb.1: @ %while.body.preheader ; CHECK-NEXT: sub r12, r3, #1 ; CHECK-NEXT: tst r3, #1 ; CHECK-NEXT: beq .LBB0_3 @@ -23,7 +23,7 @@ define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) { ; CHECK-NEXT: mov r3, r12 ; CHECK-NEXT: .LBB0_3: @ %while.body.prol.loopexit ; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: beq .LBB0_5 +; CHECK-NEXT: popeq {r11, pc} ; CHECK-NEXT: .LBB0_4: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh r12, [r0] @@ -41,9 +41,8 @@ define void @ssat_unroll(ptr %pSrcA, ptr %pSrcB, ptr %pDst, i32 %blockSize) { ; CHECK-NEXT: strh r12, [r2, #2] ; CHECK-NEXT: add r2, r2, #4 ; CHECK-NEXT: bne .LBB0_4 -; CHECK-NEXT: .LBB0_5: -; CHECK-NEXT: pop {r11, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB0_5: @ %while.end +; CHECK-NEXT: pop {r11, pc} entry: %cmp.not7 = icmp eq i32 %blockSize, 0 br i1 %cmp.not7, label %while.end, label %while.body.preheader @@ -126,11 +125,11 @@ while.end: ; preds = %while.body, %while. define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture readonly %pSrcB, ptr nocapture writeonly %pDst, i32 %blockSize) { ; CHECK-LABEL: ssat_unroll_minmax: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: bxeq lr -; CHECK-NEXT: .LBB1_1: @ %while.body.preheader ; CHECK-NEXT: .save {r11, lr} ; CHECK-NEXT: push {r11, lr} +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: beq .LBB1_5 +; CHECK-NEXT: @ %bb.1: @ %while.body.preheader ; CHECK-NEXT: sub r12, r3, #1 ; CHECK-NEXT: tst r3, #1 ; CHECK-NEXT: beq .LBB1_3 @@ -143,7 +142,7 @@ define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture rea ; CHECK-NEXT: mov r3, r12 ; CHECK-NEXT: .LBB1_3: @ %while.body.prol.loopexit ; CHECK-NEXT: cmp r12, #0 -; CHECK-NEXT: beq .LBB1_5 +; CHECK-NEXT: popeq {r11, pc} ; CHECK-NEXT: .LBB1_4: @ %while.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrsh r12, [r0] @@ -161,9 +160,8 @@ define void @ssat_unroll_minmax(ptr nocapture readonly %pSrcA, ptr nocapture rea ; CHECK-NEXT: strh r12, [r2, #2] ; CHECK-NEXT: add r2, r2, #4 ; CHECK-NEXT: bne .LBB1_4 -; CHECK-NEXT: .LBB1_5: -; CHECK-NEXT: pop {r11, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB1_5: @ %while.end +; CHECK-NEXT: pop {r11, pc} entry: %cmp.not7 = icmp eq i32 %blockSize, 0 br i1 %cmp.not7, label %while.end, label %while.body.preheader diff --git a/llvm/test/CodeGen/LoongArch/jump-table.ll b/llvm/test/CodeGen/LoongArch/jump-table.ll index c3028bade3c30..8bd4c952cf1ee 100644 --- a/llvm/test/CodeGen/LoongArch/jump-table.ll +++ b/llvm/test/CodeGen/LoongArch/jump-table.ll @@ -98,7 +98,7 @@ define void @switch_4_arms(i32 %in, ptr %out) nounwind { ; LA32-JT-NEXT: b .LBB0_5 ; LA32-JT-NEXT: .LBB0_4: # %bb4 ; LA32-JT-NEXT: ori $a0, $zero, 1 -; LA32-JT-NEXT: .LBB0_5: +; LA32-JT-NEXT: .LBB0_5: # %exit ; LA32-JT-NEXT: st.w $a0, $a1, 0 ; LA32-JT-NEXT: .LBB0_6: # %exit ; LA32-JT-NEXT: ret @@ -123,7 +123,7 @@ define void @switch_4_arms(i32 %in, ptr %out) nounwind { ; LA64-JT-NEXT: b .LBB0_5 ; LA64-JT-NEXT: .LBB0_4: # %bb4 ; LA64-JT-NEXT: ori $a0, $zero, 1 -; LA64-JT-NEXT: .LBB0_5: +; LA64-JT-NEXT: .LBB0_5: # %exit ; LA64-JT-NEXT: st.w $a0, $a1, 0 ; LA64-JT-NEXT: .LBB0_6: # %exit ; LA64-JT-NEXT: ret diff --git a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll index 35ddcfd9ba6d6..0cf7119eab84c 100644 --- a/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain-aix32.ll @@ -39,19 +39,19 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64 ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmplwi r6, 0 ; CHECK-NEXT: cmpwi cr1, r6, 0 +; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill +; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill ; CHECK-NEXT: crandc 4*cr5+lt, 4*cr1+lt, eq ; CHECK-NEXT: cmpwi cr1, r7, 0 -; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6 +; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_5 ; CHECK-NEXT: # %bb.1: # %entry ; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+eq -; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_6 +; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_5 ; CHECK-NEXT: # %bb.2: # %for.body.preheader ; CHECK-NEXT: slwi r8, r4, 1 ; CHECK-NEXT: li r10, 0 ; CHECK-NEXT: li r11, 0 -; CHECK-NEXT: stw r30, -8(r1) # 4-byte Folded Spill ; CHECK-NEXT: add r8, r4, r8 -; CHECK-NEXT: stw r31, -4(r1) # 4-byte Folded Spill ; CHECK-NEXT: add r9, r5, r8 ; CHECK-NEXT: add r5, r5, r4 ; CHECK-NEXT: add r8, r3, r5 @@ -83,15 +83,15 @@ define i64 @two_chain_same_offset_succ_i32(ptr %p, i32 %offset, i32 %base1, i64 ; CHECK-NEXT: # ; CHECK-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt ; CHECK-NEXT: bc 12, 4*cr5+lt, L..BB0_3 -; CHECK-NEXT: # %bb.5: +; CHECK-NEXT: b L..BB0_6 +; CHECK-NEXT: L..BB0_5: +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: li r5, 0 +; CHECK-NEXT: L..BB0_6: # %for.cond.cleanup ; CHECK-NEXT: lwz r31, -4(r1) # 4-byte Folded Reload ; CHECK-NEXT: lwz r30, -8(r1) # 4-byte Folded Reload ; CHECK-NEXT: mr r4, r5 ; CHECK-NEXT: blr -; CHECK-NEXT: L..BB0_6: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: li r4, 0 -; CHECK-NEXT: blr entry: %add = add nsw i32 %base1, %offset %mul = shl nsw i32 %offset, 1 diff --git a/llvm/test/CodeGen/PowerPC/common-chain.ll b/llvm/test/CodeGen/PowerPC/common-chain.ll index 5f8c21e30f8fd..ea8a72e7d11e1 100644 --- a/llvm/test/CodeGen/PowerPC/common-chain.ll +++ b/llvm/test/CodeGen/PowerPC/common-chain.ll @@ -137,14 +137,14 @@ define i64 @not_perfect_chain_all_same_offset_fail(ptr %p, i64 %offset, i64 %bas ; CHECK-LABEL: not_perfect_chain_all_same_offset_fail: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmpdi r6, 0 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: ble cr0, .LBB1_4 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: sldi r7, r4, 1 +; CHECK-NEXT: sldi r9, r4, 2 ; CHECK-NEXT: add r5, r3, r5 ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: add r8, r4, r7 -; CHECK-NEXT: sldi r9, r4, 2 ; CHECK-NEXT: mtctr r6 ; CHECK-NEXT: add r10, r4, r9 ; CHECK-NEXT: .p2align 4 @@ -161,11 +161,12 @@ define i64 @not_perfect_chain_all_same_offset_fail(ptr %p, i64 %offset, i64 %bas ; CHECK-NEXT: mulld r6, r6, r0 ; CHECK-NEXT: maddld r3, r6, r30, r3 ; CHECK-NEXT: bdnz .LBB1_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: # %bb.3: # %for.cond.cleanup ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr entry: %mul = shl nsw i64 %offset, 1 @@ -424,20 +425,20 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) { ; CHECK-LABEL: not_same_offset_fail: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: cmpdi r6, 0 -; CHECK-NEXT: ble cr0, .LBB4_4 -; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: add r5, r3, r5 -; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: mtctr r6 +; CHECK-NEXT: ble cr0, .LBB4_3 +; CHECK-NEXT: # %bb.1: # %for.body.preheader ; CHECK-NEXT: mulli r11, r4, 10 ; CHECK-NEXT: sldi r8, r4, 2 +; CHECK-NEXT: add r5, r3, r5 +; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: add r8, r4, r8 ; CHECK-NEXT: sldi r9, r4, 3 -; CHECK-NEXT: sub r10, r9, r4 +; CHECK-NEXT: mtctr r6 ; CHECK-NEXT: sldi r7, r4, 1 +; CHECK-NEXT: sub r10, r9, r4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB4_2: # %for.body ; CHECK-NEXT: # @@ -454,14 +455,14 @@ define i64 @not_same_offset_fail(ptr %p, i64 %offset, i64 %base1, i64 %n) { ; CHECK-NEXT: mulld r6, r6, r29 ; CHECK-NEXT: maddld r3, r6, r28, r3 ; CHECK-NEXT: bdnz .LBB4_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: b .LBB4_4 +; CHECK-NEXT: .LBB4_3: +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: .LBB4_4: # %for.cond.cleanup ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB4_4: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: blr entry: %mul = shl nsw i64 %offset, 1 %mul2 = mul nsw i64 %offset, 5 diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll index 37baef6043884..769b358131e9a 100644 --- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll @@ -192,21 +192,21 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: test_max_number_reminder: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r4, 0 -; CHECK-NEXT: beq cr0, .LBB2_4 +; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: beq cr0, .LBB2_3 ; CHECK-NEXT: # %bb.1: # %bb3.preheader ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r5, 1 ; CHECK-NEXT: addi r9, r3, 4002 -; CHECK-NEXT: std r25, -56(r1) # 8-byte Folded Spill ; CHECK-NEXT: li r6, -1 -; CHECK-NEXT: std r26, -48(r1) # 8-byte Folded Spill ; CHECK-NEXT: li r7, 3 ; CHECK-NEXT: li r8, 5 ; CHECK-NEXT: li r10, 9 -; CHECK-NEXT: std r27, -40(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: iselgt r3, r4, r5 ; CHECK-NEXT: mtctr r3 ; CHECK-NEXT: li r3, 0 @@ -232,7 +232,10 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: mulld r11, r11, r26 ; CHECK-NEXT: maddld r3, r11, r25, r3 ; CHECK-NEXT: bdnz .LBB2_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: b .LBB2_4 +; CHECK-NEXT: .LBB2_3: +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: .LBB2_4: # %bb45 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload @@ -241,9 +244,6 @@ define i64 @test_max_number_reminder(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: ld r26, -48(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r25, -56(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB2_4: -; CHECK-NEXT: addi r3, r4, 0 -; CHECK-NEXT: blr bb: %i = sext i32 %arg1 to i64 %i2 = icmp eq i32 %arg1, 0 @@ -475,11 +475,11 @@ define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext % ; CHECK-LABEL: test_ds_multiple_chains: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r5, 0 -; CHECK-NEXT: beq cr0, .LBB5_4 +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: beq cr0, .LBB5_3 ; CHECK-NEXT: # %bb.1: # %bb4.preheader ; CHECK-NEXT: cmpldi r5, 1 ; CHECK-NEXT: li r6, 1 -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: addi r3, r3, 4001 ; CHECK-NEXT: addi r4, r4, 4001 ; CHECK-NEXT: li r7, 9 @@ -507,13 +507,13 @@ define dso_local i64 @test_ds_multiple_chains(ptr %arg, ptr %arg1, i32 signext % ; CHECK-NEXT: mulld r8, r8, r30 ; CHECK-NEXT: maddld r6, r8, r9, r6 ; CHECK-NEXT: bdnz .LBB5_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: b .LBB5_4 +; CHECK-NEXT: .LBB5_3: +; CHECK-NEXT: li r6, 0 +; CHECK-NEXT: .LBB5_4: # %bb43 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: add r3, r6, r5 ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB5_4: -; CHECK-NEXT: addi r3, r5, 0 -; CHECK-NEXT: blr bb: %i = sext i32 %arg2 to i64 %i3 = icmp eq i32 %arg2, 0 @@ -595,17 +595,17 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) { ; CHECK-LABEL: test_ds_cross_basic_blocks: ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: cmplwi r4, 0 -; CHECK-NEXT: beq cr0, .LBB6_9 +; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: beq cr0, .LBB6_8 ; CHECK-NEXT: # %bb.1: # %bb3 ; CHECK-NEXT: addis r5, r2, .LC0@toc@ha ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r7, 1 ; CHECK-NEXT: addi r6, r3, 4009 -; CHECK-NEXT: std r28, -32(r1) # 8-byte Folded Spill ; CHECK-NEXT: ld r5, .LC0@toc@l(r5) ; CHECK-NEXT: iselgt r3, r4, r7 -; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: li r4, -7 ; CHECK-NEXT: li r8, -6 ; CHECK-NEXT: li r9, 1 @@ -634,7 +634,7 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: mulld r0, r0, r10 ; CHECK-NEXT: mulld r0, r0, r9 ; CHECK-NEXT: maddld r3, r0, r7, r3 -; CHECK-NEXT: bdz .LBB6_8 +; CHECK-NEXT: bdz .LBB6_9 ; CHECK-NEXT: .LBB6_4: # %bb5 ; CHECK-NEXT: # ; CHECK-NEXT: lbzu r0, 1(r5) @@ -666,13 +666,12 @@ define i64 @test_ds_cross_basic_blocks(ptr %arg, i32 signext %arg1) { ; CHECK-NEXT: add r7, r0, r7 ; CHECK-NEXT: b .LBB6_3 ; CHECK-NEXT: .LBB6_8: +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: .LBB6_9: # %bb64 ; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: ld r28, -32(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr -; CHECK-NEXT: .LBB6_9: -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: blr bb: %i = sext i32 %arg1 to i64 %i2 = icmp eq i32 %arg1, 0 diff --git a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll index 79f2ef3e3746a..b91f20b710a2d 100644 --- a/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll +++ b/llvm/test/CodeGen/PowerPC/lsr-profitable-chain.ll @@ -6,24 +6,24 @@ define void @foo(ptr readonly %0, ptr %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6 ; CHECK-LABEL: foo: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpd 5, 7 -; CHECK-NEXT: bgelr 0 -; CHECK-NEXT: # %bb.1: # %.preheader +; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill ; CHECK-NEXT: std 27, -40(1) # 8-byte Folded Spill -; CHECK-NEXT: addi 27, 5, 2 ; CHECK-NEXT: std 28, -32(1) # 8-byte Folded Spill -; CHECK-NEXT: addi 28, 5, 3 +; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill ; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill +; CHECK-NEXT: bge 0, .LBB0_6 +; CHECK-NEXT: # %bb.1: # %.preheader ; CHECK-NEXT: addi 30, 5, 1 +; CHECK-NEXT: addi 28, 5, 3 +; CHECK-NEXT: addi 27, 5, 2 ; CHECK-NEXT: mulld 12, 8, 5 -; CHECK-NEXT: mulld 0, 9, 8 -; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill ; CHECK-NEXT: addi 29, 3, 16 +; CHECK-NEXT: mulld 0, 9, 8 ; CHECK-NEXT: sldi 11, 10, 3 -; CHECK-NEXT: std 22, -80(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, -72(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, -64(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, -56(1) # 8-byte Folded Spill -; CHECK-NEXT: std 26, -48(1) # 8-byte Folded Spill ; CHECK-NEXT: mulld 30, 8, 30 ; CHECK-NEXT: mulld 28, 8, 28 ; CHECK-NEXT: mulld 8, 8, 27 diff --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll index 612c3fdb6b9bf..98fa21c359054 100644 --- a/llvm/test/CodeGen/PowerPC/shrink-wrap.ll +++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.ll @@ -28,18 +28,27 @@ entry: ; CHECK-LABEL: {{[\.]?}}shrinkwrapme: ; CHECK: # %bb.0: ; CHECK-NEXT: cmpwi -; CHECKAIX: blt - ; Prolog code -; CHECK: # %bb.1: ; CHECK64-COUNT-18: std + ; CHECK32-COUNT-18: stw +; CHECK: ble 0, {{.*}}BB0_3 +; CHECKAIX: blt 0, {{.*}}BB0_3 +; CHECK: # %bb.1: +; CHECK: li +; CHECK: {{.*}}BB0_2: +; CHECK: add +; CHECK: bdnz {{.*}}BB0_2 +; CHECK-NEXT: b {{.*}}BB0_4 +; CHECK: {{.*}}BB0_3: +; CHECK-NEXT: li +; CHECK: {{.*}}BB0_4: + ; Epilog code ; CHECK64-COUNT-18: ld +; ; CHECK32-COUNT-18: lwz -; CHECK: blr -; CHECK: {{.*}}BB0_4: -; CHECK-NEXT: li +; CHECK: blr } diff --git a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir index cd0a0d95f827d..f0540adad4937 100644 --- a/llvm/test/CodeGen/PowerPC/shrink-wrap.mir +++ b/llvm/test/CodeGen/PowerPC/shrink-wrap.mir @@ -1,4 +1,3 @@ -# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py # RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc64le-unknown-linux-gnu \ # RUN: -run-pass=shrink-wrap -o - %s | FileCheck %s # RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple powerpc-ibm-aix-xcoff \ @@ -123,15 +122,13 @@ body: | BDNZ8 %bb.4, implicit-def dead $ctr8, implicit $ctr8 B %bb.3 - ; CHECK: savePoint: '%bb.2' - ; CHECK-NEXT: restorePoint: '%bb.5' + ; CHECK: savePoint: '' + ; CHECK-NEXT: restorePoint: '' ; CHECK: bb.4.for.body: - ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.5(0x04000000) + ; CHECK-NEXT: successors: %bb.4(0x7c000000), %bb.3(0x04000000) ; CHECK-NEXT: liveins: $r4, $x3 ; CHECK: INLINEASM ; CHECK-NEXT: BDNZ8 %bb.4 - ; CHECK-NEXT: B %bb.5 + ; CHECK-NEXT: B %bb.3 ... -## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -# CHECK: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/aext-to-sext.ll b/llvm/test/CodeGen/RISCV/aext-to-sext.ll index 0aa04f40f6a52..806c495fa6777 100644 --- a/llvm/test/CodeGen/RISCV/aext-to-sext.ll +++ b/llvm/test/CodeGen/RISCV/aext-to-sext.ll @@ -11,22 +11,21 @@ define void @quux(i32 signext %arg, i32 signext %arg1) nounwind { ; RV64I-LABEL: quux: ; RV64I: # %bb.0: # %bb -; RV64I-NEXT: beq a0, a1, .LBB0_4 -; RV64I-NEXT: # %bb.1: # %bb2.preheader ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: beq a0, a1, .LBB0_3 +; RV64I-NEXT: # %bb.1: # %bb2.preheader ; RV64I-NEXT: subw s0, a1, a0 ; RV64I-NEXT: .LBB0_2: # %bb2 ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-NEXT: call hoge@plt ; RV64I-NEXT: addiw s0, s0, -1 ; RV64I-NEXT: bnez s0, .LBB0_2 -; RV64I-NEXT: # %bb.3: +; RV64I-NEXT: .LBB0_3: # %bb6 ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 -; RV64I-NEXT: .LBB0_4: # %bb6 ; RV64I-NEXT: ret bb: %tmp = icmp eq i32 %arg, %arg1 diff --git a/llvm/test/CodeGen/RISCV/fli-licm.ll b/llvm/test/CodeGen/RISCV/fli-licm.ll index f37ace801b159..93bb934c1cb0d 100644 --- a/llvm/test/CodeGen/RISCV/fli-licm.ll +++ b/llvm/test/CodeGen/RISCV/fli-licm.ll @@ -12,11 +12,11 @@ define void @process_nodes(ptr %0) nounwind { ; RV32-LABEL: process_nodes: ; RV32: # %bb.0: # %entry -; RV32-NEXT: beqz a0, .LBB0_4 -; RV32-NEXT: # %bb.1: # %loop.preheader ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: beqz a0, .LBB0_3 +; RV32-NEXT: # %bb.1: # %loop.preheader ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: .LBB0_2: # %loop ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 @@ -25,20 +25,19 @@ define void @process_nodes(ptr %0) nounwind { ; RV32-NEXT: call do_it@plt ; RV32-NEXT: lw s0, 0(s0) ; RV32-NEXT: bnez s0, .LBB0_2 -; RV32-NEXT: # %bb.3: +; RV32-NEXT: .LBB0_3: # %exit ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: .LBB0_4: # %exit ; RV32-NEXT: ret ; ; RV64-LABEL: process_nodes: ; RV64: # %bb.0: # %entry -; RV64-NEXT: beqz a0, .LBB0_4 -; RV64-NEXT: # %bb.1: # %loop.preheader ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64-NEXT: beqz a0, .LBB0_3 +; RV64-NEXT: # %bb.1: # %loop.preheader ; RV64-NEXT: mv s0, a0 ; RV64-NEXT: .LBB0_2: # %loop ; RV64-NEXT: # =>This Inner Loop Header: Depth=1 @@ -47,11 +46,10 @@ define void @process_nodes(ptr %0) nounwind { ; RV64-NEXT: call do_it@plt ; RV64-NEXT: ld s0, 0(s0) ; RV64-NEXT: bnez s0, .LBB0_2 -; RV64-NEXT: # %bb.3: +; RV64-NEXT: .LBB0_3: # %exit ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: .LBB0_4: # %exit ; RV64-NEXT: ret entry: %1 = icmp eq ptr %0, null diff --git a/llvm/test/CodeGen/RISCV/jumptable.ll b/llvm/test/CodeGen/RISCV/jumptable.ll index abbedf015064d..4cc17cee230e7 100644 --- a/llvm/test/CodeGen/RISCV/jumptable.ll +++ b/llvm/test/CodeGen/RISCV/jumptable.ll @@ -97,7 +97,7 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind { ; RV32I-SMALL-NEXT: j .LBB1_8 ; RV32I-SMALL-NEXT: .LBB1_7: # %bb6 ; RV32I-SMALL-NEXT: li a0, 200 -; RV32I-SMALL-NEXT: .LBB1_8: +; RV32I-SMALL-NEXT: .LBB1_8: # %exit ; RV32I-SMALL-NEXT: sw a0, 0(a1) ; RV32I-SMALL-NEXT: .LBB1_9: # %exit ; RV32I-SMALL-NEXT: ret @@ -132,7 +132,7 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind { ; RV32I-MEDIUM-NEXT: j .LBB1_8 ; RV32I-MEDIUM-NEXT: .LBB1_7: # %bb6 ; RV32I-MEDIUM-NEXT: li a0, 200 -; RV32I-MEDIUM-NEXT: .LBB1_8: +; RV32I-MEDIUM-NEXT: .LBB1_8: # %exit ; RV32I-MEDIUM-NEXT: sw a0, 0(a1) ; RV32I-MEDIUM-NEXT: .LBB1_9: # %exit ; RV32I-MEDIUM-NEXT: ret @@ -168,7 +168,7 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind { ; RV32I-PIC-NEXT: j .LBB1_8 ; RV32I-PIC-NEXT: .LBB1_7: # %bb6 ; RV32I-PIC-NEXT: li a0, 200 -; RV32I-PIC-NEXT: .LBB1_8: +; RV32I-PIC-NEXT: .LBB1_8: # %exit ; RV32I-PIC-NEXT: sw a0, 0(a1) ; RV32I-PIC-NEXT: .LBB1_9: # %exit ; RV32I-PIC-NEXT: ret @@ -202,7 +202,7 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind { ; RV64I-SMALL-NEXT: j .LBB1_8 ; RV64I-SMALL-NEXT: .LBB1_7: # %bb6 ; RV64I-SMALL-NEXT: li a0, 200 -; RV64I-SMALL-NEXT: .LBB1_8: +; RV64I-SMALL-NEXT: .LBB1_8: # %exit ; RV64I-SMALL-NEXT: sw a0, 0(a1) ; RV64I-SMALL-NEXT: .LBB1_9: # %exit ; RV64I-SMALL-NEXT: ret @@ -237,7 +237,7 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind { ; RV64I-MEDIUM-NEXT: j .LBB1_8 ; RV64I-MEDIUM-NEXT: .LBB1_7: # %bb6 ; RV64I-MEDIUM-NEXT: li a0, 200 -; RV64I-MEDIUM-NEXT: .LBB1_8: +; RV64I-MEDIUM-NEXT: .LBB1_8: # %exit ; RV64I-MEDIUM-NEXT: sw a0, 0(a1) ; RV64I-MEDIUM-NEXT: .LBB1_9: # %exit ; RV64I-MEDIUM-NEXT: ret @@ -273,7 +273,7 @@ define void @above_threshold(i32 signext %in, ptr %out) nounwind { ; RV64I-PIC-NEXT: j .LBB1_8 ; RV64I-PIC-NEXT: .LBB1_7: # %bb6 ; RV64I-PIC-NEXT: li a0, 200 -; RV64I-PIC-NEXT: .LBB1_8: +; RV64I-PIC-NEXT: .LBB1_8: # %exit ; RV64I-PIC-NEXT: sw a0, 0(a1) ; RV64I-PIC-NEXT: .LBB1_9: # %exit ; RV64I-PIC-NEXT: ret diff --git a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll index 640615facbd18..07e19dd58fe5a 100644 --- a/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll +++ b/llvm/test/CodeGen/Thumb/thumb-shrink-wrapping.ll @@ -1445,15 +1445,15 @@ if.end: define i1 @beq_to_bx(ptr %y, i32 %head) { ; ENABLE-V4T-LABEL: beq_to_bx: ; ENABLE-V4T: @ %bb.0: @ %entry -; ENABLE-V4T-NEXT: movs r2, r0 -; ENABLE-V4T-NEXT: movs r0, #1 -; ENABLE-V4T-NEXT: cmp r2, #0 -; ENABLE-V4T-NEXT: beq LBB11_4 -; ENABLE-V4T-NEXT: @ %bb.1: @ %if.end ; ENABLE-V4T-NEXT: push {r4, lr} ; ENABLE-V4T-NEXT: .cfi_def_cfa_offset 8 ; ENABLE-V4T-NEXT: .cfi_offset lr, -4 ; ENABLE-V4T-NEXT: .cfi_offset r4, -8 +; ENABLE-V4T-NEXT: movs r2, r0 +; ENABLE-V4T-NEXT: movs r0, #1 +; ENABLE-V4T-NEXT: cmp r2, #0 +; ENABLE-V4T-NEXT: beq LBB11_3 +; ENABLE-V4T-NEXT: @ %bb.1: @ %if.end ; ENABLE-V4T-NEXT: ldr r3, [r2] ; ENABLE-V4T-NEXT: lsls r4, r3, #30 ; ENABLE-V4T-NEXT: bpl LBB11_3 @@ -1461,24 +1461,22 @@ define i1 @beq_to_bx(ptr %y, i32 %head) { ; ENABLE-V4T-NEXT: str r1, [r2] ; ENABLE-V4T-NEXT: str r3, [r2] ; ENABLE-V4T-NEXT: movs r0, #0 -; ENABLE-V4T-NEXT: LBB11_3: +; ENABLE-V4T-NEXT: LBB11_3: @ %cleanup ; ENABLE-V4T-NEXT: pop {r4} ; ENABLE-V4T-NEXT: pop {r1} -; ENABLE-V4T-NEXT: mov lr, r1 -; ENABLE-V4T-NEXT: LBB11_4: @ %cleanup -; ENABLE-V4T-NEXT: bx lr +; ENABLE-V4T-NEXT: bx r1 ; ; ENABLE-V5T-LABEL: beq_to_bx: ; ENABLE-V5T: @ %bb.0: @ %entry -; ENABLE-V5T-NEXT: movs r2, r0 -; ENABLE-V5T-NEXT: movs r0, #1 -; ENABLE-V5T-NEXT: cmp r2, #0 -; ENABLE-V5T-NEXT: beq LBB11_4 -; ENABLE-V5T-NEXT: @ %bb.1: @ %if.end ; ENABLE-V5T-NEXT: push {r4, lr} ; ENABLE-V5T-NEXT: .cfi_def_cfa_offset 8 ; ENABLE-V5T-NEXT: .cfi_offset lr, -4 ; ENABLE-V5T-NEXT: .cfi_offset r4, -8 +; ENABLE-V5T-NEXT: movs r2, r0 +; ENABLE-V5T-NEXT: movs r0, #1 +; ENABLE-V5T-NEXT: cmp r2, #0 +; ENABLE-V5T-NEXT: beq LBB11_3 +; ENABLE-V5T-NEXT: @ %bb.1: @ %if.end ; ENABLE-V5T-NEXT: ldr r3, [r2] ; ENABLE-V5T-NEXT: lsls r4, r3, #30 ; ENABLE-V5T-NEXT: bpl LBB11_3 @@ -1486,10 +1484,8 @@ define i1 @beq_to_bx(ptr %y, i32 %head) { ; ENABLE-V5T-NEXT: str r1, [r2] ; ENABLE-V5T-NEXT: str r3, [r2] ; ENABLE-V5T-NEXT: movs r0, #0 -; ENABLE-V5T-NEXT: LBB11_3: +; ENABLE-V5T-NEXT: LBB11_3: @ %cleanup ; ENABLE-V5T-NEXT: pop {r4, pc} -; ENABLE-V5T-NEXT: LBB11_4: @ %cleanup -; ENABLE-V5T-NEXT: bx lr ; ; DISABLE-V4T-LABEL: beq_to_bx: ; DISABLE-V4T: @ %bb.0: @ %entry diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll index 421b5b5364d35..d67e66d7a7131 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/inlineasm.ll @@ -4,13 +4,11 @@ define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: bxlt lr -; CHECK-NEXT: .LBB0_1: @ %for.body.preheader ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: blt .LBB0_4 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: mov lr, r0 ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: .LBB0_2: @ %for.body @@ -23,7 +21,10 @@ define i32 @test(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) { ; CHECK-NEXT: @NO_APP ; CHECK-NEXT: add r0, r3 ; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: .LBB0_4: +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: pop {r7, pc} entry: %cmp9 = icmp sgt i32 %n, 0 @@ -50,13 +51,11 @@ for.body: ; preds = %entry, %for.body define i32 @testlr(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) { ; CHECK-LABEL: testlr: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: bxlt lr -; CHECK-NEXT: .LBB1_1: @ %for.body.preheader ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: blt .LBB1_4 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: mov r3, r0 ; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: .LBB1_2: @ %for.body @@ -69,7 +68,10 @@ define i32 @testlr(ptr nocapture readonly %x, ptr nocapture readonly %y, i32 %n) ; CHECK-NEXT: @NO_APP ; CHECK-NEXT: add r0, r4 ; CHECK-NEXT: bne .LBB1_2 -; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup +; CHECK-NEXT: pop {r4, pc} +; CHECK-NEXT: .LBB1_4: +; CHECK-NEXT: movs r0, #0 ; CHECK-NEXT: pop {r4, pc} entry: %cmp9 = icmp sgt i32 %n, 0 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll index 59b32a3f441c1..99d169e63e5a5 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/memcall.ll @@ -4,12 +4,11 @@ define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) { ; CHECK-LABEL: test_memcpy: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: it lt -; CHECK-NEXT: bxlt lr -; CHECK-NEXT: .LBB0_1: @ %for.body.preheader ; CHECK-NEXT: .save {r4, r5, r6, r7, lr} ; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: blt .LBB0_5 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: lsl.w r12, r3, #2 ; CHECK-NEXT: movs r7, #0 ; CHECK-NEXT: b .LBB0_2 @@ -32,9 +31,8 @@ define void @test_memcpy(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i3 ; CHECK-NEXT: vstrb.8 q0, [r5], #16 ; CHECK-NEXT: letp lr, .LBB0_4 ; CHECK-NEXT: b .LBB0_3 -; CHECK-NEXT: .LBB0_5: -; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB0_5: @ %for.cond.cleanup +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: %cmp8 = icmp sgt i32 %n, 0 br i1 %cmp8, label %for.body, label %for.cond.cleanup @@ -57,12 +55,12 @@ for.body: ; preds = %entry, %for.body define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) { ; CHECK-LABEL: test_memset: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: cmp r1, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: bxlt lr +; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB1_1: -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: b .LBB1_2 ; CHECK-NEXT: .LBB1_2: @ %for.body @@ -82,9 +80,8 @@ define void @test_memset(ptr nocapture %x, i32 %n, i32 %m) { ; CHECK-NEXT: vstrb.8 q0, [r12], #16 ; CHECK-NEXT: letp lr, .LBB1_4 ; CHECK-NEXT: b .LBB1_3 -; CHECK-NEXT: .LBB1_5: -; CHECK-NEXT: pop.w {r7, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB1_5: @ %for.cond.cleanup +; CHECK-NEXT: pop {r7, pc} entry: %cmp5 = icmp sgt i32 %n, 0 br i1 %cmp5, label %for.body, label %for.cond.cleanup @@ -105,14 +102,13 @@ for.body: ; preds = %entry, %for.body define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i32 %m) { ; CHECK-LABEL: test_memmove: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: it lt -; CHECK-NEXT: bxlt lr -; CHECK-NEXT: .LBB2_1: @ %for.body.preheader ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: blt .LBB2_3 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: mov r5, r2 ; CHECK-NEXT: mov r9, r1 @@ -128,10 +124,9 @@ define void @test_memmove(ptr nocapture %x, ptr nocapture readonly %y, i32 %n, i ; CHECK-NEXT: add r6, r4 ; CHECK-NEXT: subs r5, #1 ; CHECK-NEXT: bne .LBB2_2 -; CHECK-NEXT: @ %bb.3: +; CHECK-NEXT: .LBB2_3: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} entry: %cmp8 = icmp sgt i32 %n, 0 br i1 %cmp8, label %for.body, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll index 23eb5900bb7d1..13e39a8f16e33 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-float-loops.ll @@ -4,11 +4,10 @@ define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_float_mul: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: bxeq lr -; CHECK-NEXT: .LBB0_1: @ %for.body.preheader ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: beq .LBB0_10 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB0_3 ; CHECK-NEXT: @ %bb.2: @@ -81,9 +80,8 @@ define arm_aapcs_vfpcc void @float_float_mul(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5, #12] ; CHECK-NEXT: bne .LBB0_9 -; CHECK-NEXT: .LBB0_10: -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB0_10: @ %for.cond.cleanup +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} ; CHECK-NEXT: .LBB0_11: @ %vector.ph ; CHECK-NEXT: bic r12, r3, #3 ; CHECK-NEXT: movs r6, #1 @@ -217,11 +215,10 @@ for.body: ; preds = %for.body.prol.loope define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_float_add: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: bxeq lr -; CHECK-NEXT: .LBB1_1: @ %for.body.preheader ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: beq .LBB1_10 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB1_3 ; CHECK-NEXT: @ %bb.2: @@ -294,9 +291,8 @@ define arm_aapcs_vfpcc void @float_float_add(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: vadd.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5, #12] ; CHECK-NEXT: bne .LBB1_9 -; CHECK-NEXT: .LBB1_10: -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB1_10: @ %for.cond.cleanup +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} ; CHECK-NEXT: .LBB1_11: @ %vector.ph ; CHECK-NEXT: bic r12, r3, #3 ; CHECK-NEXT: movs r6, #1 @@ -430,11 +426,10 @@ for.body: ; preds = %for.body.prol.loope define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_float_sub: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: bxeq lr -; CHECK-NEXT: .LBB2_1: @ %for.body.preheader ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: beq .LBB2_10 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bhi .LBB2_3 ; CHECK-NEXT: @ %bb.2: @@ -507,9 +502,8 @@ define arm_aapcs_vfpcc void @float_float_sub(ptr nocapture readonly %a, ptr noca ; CHECK-NEXT: vsub.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r5, #12] ; CHECK-NEXT: bne .LBB2_9 -; CHECK-NEXT: .LBB2_10: -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB2_10: @ %for.cond.cleanup +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} ; CHECK-NEXT: .LBB2_11: @ %vector.ph ; CHECK-NEXT: bic r12, r3, #3 ; CHECK-NEXT: movs r6, #1 @@ -643,11 +637,10 @@ for.body: ; preds = %for.body.prol.loope define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapture readonly %b, ptr nocapture %c, i32 %N) { ; CHECK-LABEL: float_int_mul: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: bxeq lr -; CHECK-NEXT: .LBB3_1: @ %for.body.preheader ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: beq.w .LBB3_13 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: cmp r3, #3 ; CHECK-NEXT: bls .LBB3_6 ; CHECK-NEXT: @ %bb.2: @ %vector.memcheck @@ -736,9 +729,8 @@ define arm_aapcs_vfpcc void @float_int_mul(ptr nocapture readonly %a, ptr nocapt ; CHECK-NEXT: vmul.f32 s0, s2, s0 ; CHECK-NEXT: vstr s0, [r6, #12] ; CHECK-NEXT: bne .LBB3_12 -; CHECK-NEXT: .LBB3_13: -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB3_13: @ %for.cond.cleanup +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %for.body.preheader diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll index 93119eac2d564..eb98b85eafc90 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll @@ -411,12 +411,10 @@ for.cond.cleanup: ; preds = %middle.block, %entr define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr { ; CHECK-LABEL: two_loops_mul_add_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: itt eq -; CHECK-NEXT: moveq r0, #0 -; CHECK-NEXT: bxeq lr -; CHECK-NEXT: .LBB6_1: @ %vector.ph ; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: cmp r2, #0 +; CHECK-NEXT: beq .LBB6_8 +; CHECK-NEXT: @ %bb.1: @ %vector.ph ; CHECK-NEXT: adds r3, r2, #3 ; CHECK-NEXT: vmov.i32 q1, #0x0 ; CHECK-NEXT: bic r3, r3, #3 @@ -463,10 +461,12 @@ define dso_local arm_aapcs_vfpcc i32 @two_loops_mul_add_v4i32(i8* nocapture read ; CHECK-NEXT: @ %bb.6: @ %middle.block44 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: vaddv.u32 r12, q0 -; CHECK-NEXT: .LBB6_7: -; CHECK-NEXT: pop.w {r4, r5, r6, r7, lr} +; CHECK-NEXT: .LBB6_7: @ %for.cond.cleanup7 ; CHECK-NEXT: mov r0, r12 -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: .LBB6_8: +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: %cmp35 = icmp eq i32 %N, 0 br i1 %cmp35, label %for.cond.cleanup7, label %vector.ph diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll index 1f3a43923db61..caf7a339805fc 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/sibling-loops.ll @@ -4,11 +4,10 @@ define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noalias nocapture %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: test: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: it lt -; CHECK-NEXT: bxlt lr -; CHECK-NEXT: .LBB0_1: @ %for.cond1.preheader.us.preheader ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: blt .LBB0_7 +; CHECK-NEXT: @ %bb.1: @ %for.cond1.preheader.us.preheader ; CHECK-NEXT: mov r8, r3 ; CHECK-NEXT: lsl.w r12, r3, #1 ; CHECK-NEXT: movs r3, #0 @@ -48,9 +47,8 @@ define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noali ; CHECK-NEXT: add r4, r12 ; CHECK-NEXT: cmp r3, r8 ; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: @ %bb.7: -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB0_7: @ %for.cond.cleanup +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %cmp252 = icmp sgt i32 %n, 0 br i1 %cmp252, label %for.cond1.preheader.us, label %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll index 3b42ee36e7c2e..fc58873f9857b 100644 --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/while-loops.ll @@ -53,12 +53,10 @@ if.end: ; preds = %do.body, %entry define void @nested(ptr nocapture readonly %x, ptr nocapture readnone %y, ptr nocapture %z, i32 %m, i32 %n) { ; CHECK-LABEL: nested: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: it eq -; CHECK-NEXT: bxeq lr -; CHECK-NEXT: .LBB1_1: @ %for.body.preheader ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: cbz r3, .LBB1_8 +; CHECK-NEXT: @ %bb.1: @ %for.body.preheader ; CHECK-NEXT: ldr.w r12, [sp, #24] ; CHECK-NEXT: movs r1, #0 ; CHECK-NEXT: b .LBB1_4 @@ -93,9 +91,8 @@ define void @nested(ptr nocapture readonly %x, ptr nocapture readnone %y, ptr no ; CHECK-NEXT: sub.w r12, r12, r5 ; CHECK-NEXT: mov r0, r8 ; CHECK-NEXT: b .LBB1_3 -; CHECK-NEXT: .LBB1_8: -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB1_8: @ %for.cond.cleanup +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %cmp20.not = icmp eq i32 %m, 0 br i1 %cmp20.not, label %for.cond.cleanup, label %for.body diff --git a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll index b7b19a477ab0f..6228d616b5842 100644 --- a/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll +++ b/llvm/test/CodeGen/Thumb2/mve-float32regloops.ll @@ -981,13 +981,6 @@ if.end61: ; preds = %if.then59, %while.e define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr nocapture %pDst, i32 %blockSize) { ; CHECK-LABEL: fir: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #8 -; CHECK-NEXT: blo.w .LBB16_13 -; CHECK-NEXT: @ %bb.1: @ %if.then -; CHECK-NEXT: lsrs.w r12, r3, #2 -; CHECK-NEXT: it eq -; CHECK-NEXT: bxeq lr -; CHECK-NEXT: .LBB16_2: @ %while.body.lr.ph ; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 @@ -996,6 +989,12 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: .pad #32 ; CHECK-NEXT: sub sp, #32 +; CHECK-NEXT: cmp r3, #8 +; CHECK-NEXT: blo.w .LBB16_12 +; CHECK-NEXT: @ %bb.1: @ %if.then +; CHECK-NEXT: lsrs.w r12, r3, #2 +; CHECK-NEXT: beq.w .LBB16_12 +; CHECK-NEXT: @ %bb.2: @ %while.body.lr.ph ; CHECK-NEXT: ldrh r6, [r0] ; CHECK-NEXT: movs r5, #1 ; CHECK-NEXT: ldrd r4, r10, [r0, #4] @@ -1107,13 +1106,11 @@ define void @fir(ptr nocapture readonly %S, ptr nocapture readonly %pSrc, ptr no ; CHECK-NEXT: ldr r0, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: add.w r4, r4, r0, lsl #2 ; CHECK-NEXT: b .LBB16_4 -; CHECK-NEXT: .LBB16_12: +; CHECK-NEXT: .LBB16_12: @ %if.end ; CHECK-NEXT: add sp, #32 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .LBB16_13: @ %if.end -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} entry: %pState1 = getelementptr inbounds %struct.arm_fir_instance_f32, ptr %S, i32 0, i32 1 %i = load ptr, ptr %pState1, align 4 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll index 0335d24c0a782..24f1831a3f07c 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-increment.ll @@ -290,12 +290,12 @@ end: define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: gather_inc_v4i32_simple: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: bxlt lr +; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB8_1: @ %vector.ph.preheader -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -319,9 +319,8 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture reado ; CHECK-NEXT: @ in Loop: Header=BB8_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: bne .LBB8_2 -; CHECK-NEXT: @ %bb.5: -; CHECK-NEXT: pop.w {r4, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup +; CHECK-NEXT: pop {r4, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI8_0: @@ -360,14 +359,13 @@ for.cond.cleanup: ; preds = %for.body, %middle.b define arm_aapcs_vfpcc void @gather_inc_v4i32_complex(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: gather_inc_v4i32_complex: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r2, #1 -; CHECK-NEXT: it lt -; CHECK-NEXT: bxlt lr -; CHECK-NEXT: .LBB9_1: @ %vector.ph.preheader ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: cmp r2, #1 +; CHECK-NEXT: blt .LBB9_5 +; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -403,10 +401,9 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_complex(ptr noalias nocapture read ; CHECK-NEXT: @ in Loop: Header=BB9_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: bne .LBB9_2 -; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: .LBB9_5: @ %for.cond.cleanup ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: pop.w {r4, r5, r7, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI9_0: @@ -464,12 +461,12 @@ for.cond.cleanup: ; preds = %for.body, %middle.b define arm_aapcs_vfpcc void @gather_inc_v4i32_large(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: gather_inc_v4i32_large: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: bxlt lr +; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB10_1: @ %vector.ph.preheader -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -493,9 +490,8 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_large(ptr noalias nocapture readon ; CHECK-NEXT: @ in Loop: Header=BB10_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: bne .LBB10_2 -; CHECK-NEXT: @ %bb.5: -; CHECK-NEXT: pop.w {r4, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup +; CHECK-NEXT: pop {r4, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI10_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll index ea186cd6ed2d4..9093b9af00656 100644 --- a/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-tailpred.ll @@ -4,12 +4,12 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture readonly %data, ptr noalias nocapture %dst, i32 %n) { ; CHECK-LABEL: gather_inc_v4i32_simple: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp r2, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: bxlt lr +; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB0_1: @ %vector.ph.preheader -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: bic r12, r2, #3 ; CHECK-NEXT: movs r3, #1 ; CHECK-NEXT: sub.w lr, r12, #4 @@ -33,9 +33,8 @@ define arm_aapcs_vfpcc void @gather_inc_v4i32_simple(ptr noalias nocapture reado ; CHECK-NEXT: @ in Loop: Header=BB0_2 Depth=1 ; CHECK-NEXT: cmp r12, r2 ; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: @ %bb.5: -; CHECK-NEXT: pop.w {r4, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: @ %bb.5: @ %for.cond.cleanup +; CHECK-NEXT: pop {r4, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI0_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll index da59cb259db61..5f3a12711dc0f 100644 --- a/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-memtp-loop.ll @@ -211,12 +211,12 @@ entry: define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) { ; CHECK-LABEL: test11: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp.w r2, #-1 ; CHECK-NEXT: it gt -; CHECK-NEXT: bxgt lr +; CHECK-NEXT: popgt {r4, pc} ; CHECK-NEXT: .LBB10_1: @ %prehead -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: mov r12, r1 ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: wlstp.8 lr, r2, .LBB10_3 @@ -230,9 +230,8 @@ define void @test11(ptr nocapture %x, ptr nocapture %y, i32 %n) { ; CHECK-NEXT: subs r2, #2 ; CHECK-NEXT: strb r3, [r1], #1 ; CHECK-NEXT: bne .LBB10_3 -; CHECK-NEXT: @ %bb.4: -; CHECK-NEXT: pop.w {r4, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: @ %bb.4: @ %for.cond.cleanup +; CHECK-NEXT: pop {r4, pc} entry: %cmp6 = icmp slt i32 %n, 0 br i1 %cmp6, label %prehead, label %for.cond.cleanup @@ -441,12 +440,12 @@ declare void @other() define void @multilooped_exit(i32 %b) { ; CHECK-LABEL: multilooped_exit: ; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: cmp r0, #1 ; CHECK-NEXT: it lt -; CHECK-NEXT: bxlt lr +; CHECK-NEXT: poplt {r4, pc} ; CHECK-NEXT: .LBB18_1: @ %loop.preheader -; CHECK-NEXT: .save {r4, lr} -; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: mov.w r4, #-1 ; CHECK-NEXT: vmov.i32 q0, #0x0 ; CHECK-NEXT: b .LBB18_3 @@ -499,9 +498,8 @@ define void @multilooped_exit(i32 %b) { ; CHECK-NEXT: vstrb.8 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB18_11 ; CHECK-NEXT: b .LBB18_2 -; CHECK-NEXT: .LBB18_12: -; CHECK-NEXT: pop.w {r4, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB18_12: @ %exit +; CHECK-NEXT: pop {r4, pc} entry: %cmp8 = icmp sgt i32 %b, 0 br i1 %cmp8, label %loop, label %exit diff --git a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll index 3a14e650bd53a..94397f0ae587b 100644 --- a/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll +++ b/llvm/test/CodeGen/Thumb2/mve-scatter-increment.ll @@ -127,16 +127,15 @@ define arm_aapcs_vfpcc void @scatter_inc_mini_16i8(<16 x i8> %data, ptr %dst, <1 define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex(<4 x i32> %data1, <4 x i32> %data2, <4 x i32> %data3, ptr %dst, i32 %n) { ; CHECK-LABEL: scatter_inc_v4i32_complex: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r1, #1 -; CHECK-NEXT: it lt -; CHECK-NEXT: bxlt lr -; CHECK-NEXT: .LBB3_1: @ %vector.ph.preheader ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: cmp r1, #1 +; CHECK-NEXT: blt .LBB3_5 +; CHECK-NEXT: @ %bb.1: @ %vector.ph.preheader ; CHECK-NEXT: adr r4, .LCPI3_2 ; CHECK-NEXT: bic r2, r1, #3 ; CHECK-NEXT: vldrw.u32 q3, [r4] @@ -169,11 +168,10 @@ define arm_aapcs_vfpcc void @scatter_inc_v4i32_complex(<4 x i32> %data1, <4 x i3 ; CHECK-NEXT: @ in Loop: Header=BB3_2 Depth=1 ; CHECK-NEXT: cmp r2, r1 ; CHECK-NEXT: bne .LBB3_2 -; CHECK-NEXT: @ %bb.5: +; CHECK-NEXT: .LBB3_5: @ %for.cond.cleanup ; CHECK-NEXT: add sp, #16 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: pop.w {r4, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: pop {r4, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.6: ; CHECK-NEXT: .LCPI3_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll b/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll index 42a00b61b4183..85425db1eb6c8 100644 --- a/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll +++ b/llvm/test/CodeGen/Thumb2/mve-tailpred-nonzerostart.ll @@ -58,12 +58,11 @@ for.cond.cleanup: ; preds = %vector.body, %entry define arm_aapcs_vfpcc void @start11(ptr nocapture readonly %x, ptr nocapture readonly %y, ptr noalias nocapture %z, float %a, i32 %n) { ; CHECK-LABEL: start11: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: it lt -; CHECK-NEXT: bxlt lr -; CHECK-NEXT: .LBB1_1: @ %vector.ph ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: blt .LBB1_3 +; CHECK-NEXT: @ %bb.1: @ %vector.ph ; CHECK-NEXT: vmov r12, s0 ; CHECK-NEXT: adds r4, r3, #3 ; CHECK-NEXT: adr r5, .LCPI1_0 @@ -86,9 +85,8 @@ define arm_aapcs_vfpcc void @start11(ptr nocapture readonly %x, ptr nocapture re ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q3, [r2], #16 ; CHECK-NEXT: bne .LBB1_2 -; CHECK-NEXT: @ %bb.3: -; CHECK-NEXT: pop.w {r4, r5, r7, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: .LBB1_3: @ %for.cond.cleanup +; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.4: ; CHECK-NEXT: .LCPI1_0: diff --git a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll index 0a26d9920981b..da0cd57d86dbb 100644 --- a/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmull-loop.ll @@ -4,13 +4,11 @@ define arm_aapcs_vfpcc void @test32(ptr noalias nocapture readonly %x, ptr noalias nocapture readonly %y, ptr nocapture %z, i32 %n) { ; CHECK-LABEL: test32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: cmp r3, #1 -; CHECK-NEXT: it lt -; CHECK-NEXT: bxlt lr -; CHECK-NEXT: .LBB0_1: @ %vector.body.preheader ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: .LBB0_2: @ %vector.body +; CHECK-NEXT: cmp r3, #1 +; CHECK-NEXT: blt .LBB0_2 +; CHECK-NEXT: .LBB0_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 @@ -28,10 +26,9 @@ define arm_aapcs_vfpcc void @test32(ptr noalias nocapture readonly %x, ptr noali ; CHECK-NEXT: lsrl r4, r5, #31 ; CHECK-NEXT: vmov q2[3], q2[1], r4, r12 ; CHECK-NEXT: vstrb.8 q2, [r2], #16 -; CHECK-NEXT: bne .LBB0_2 -; CHECK-NEXT: @ %bb.3: -; CHECK-NEXT: pop.w {r4, r5, r7, lr} -; CHECK-NEXT: bx lr +; CHECK-NEXT: bne .LBB0_1 +; CHECK-NEXT: .LBB0_2: @ %for.cond.cleanup +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %0 = and i32 %n, 3 %cmp = icmp eq i32 %0, 0 diff --git a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll index d0d46b5f11836..e21d4de178719 100644 --- a/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll +++ b/llvm/test/CodeGen/X86/negative-stride-fptosi-user.ll @@ -9,14 +9,12 @@ define void @foo(i32 %N) nounwind { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: js .LBB0_1 -; CHECK-NEXT: # %bb.4: # %return -; CHECK-NEXT: retq -; CHECK-NEXT: .LBB0_1: # %bb.preheader ; CHECK-NEXT: pushq %rbp ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: pushq %rax +; CHECK-NEXT: testl %edi, %edi +; CHECK-NEXT: jns .LBB0_3 +; CHECK-NEXT: # %bb.1: # %bb.preheader ; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: xorl %ebp, %ebp ; CHECK-NEXT: .p2align 4, 0x90 @@ -28,7 +26,7 @@ define void @foo(i32 %N) nounwind { ; CHECK-NEXT: decl %ebp ; CHECK-NEXT: cmpl %ebp, %ebx ; CHECK-NEXT: jne .LBB0_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: .LBB0_3: # %return ; CHECK-NEXT: addq $8, %rsp ; CHECK-NEXT: popq %rbx ; CHECK-NEXT: popq %rbp diff --git a/llvm/test/CodeGen/X86/pr44412.ll b/llvm/test/CodeGen/X86/pr44412.ll index 67579a5bb7c52..6c33666fb5c3a 100644 --- a/llvm/test/CodeGen/X86/pr44412.ll +++ b/llvm/test/CodeGen/X86/pr44412.ll @@ -4,10 +4,10 @@ define void @bar(i32 %0, i32 %1) nounwind { ; CHECK-LABEL: bar: ; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB0_4 +; CHECK-NEXT: je .LBB0_3 ; CHECK-NEXT: # %bb.1: # %.preheader -; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: decl %ebx ; CHECK-NEXT: .p2align 4, 0x90 @@ -16,9 +16,8 @@ define void @bar(i32 %0, i32 %1) nounwind { ; CHECK-NEXT: callq foo@PLT ; CHECK-NEXT: addl $-1, %ebx ; CHECK-NEXT: jb .LBB0_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: .LBB0_3: ; CHECK-NEXT: popq %rbx -; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: retq %3 = icmp eq i32 %0, 0 br i1 %3, label %8, label %4 @@ -37,10 +36,10 @@ define void @bar(i32 %0, i32 %1) nounwind { define void @baz(i32 %0, i32 %1) nounwind { ; CHECK-LABEL: baz: ; CHECK: # %bb.0: +; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: testl %edi, %edi -; CHECK-NEXT: je .LBB1_4 +; CHECK-NEXT: je .LBB1_3 ; CHECK-NEXT: # %bb.1: # %.preheader -; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: movl %edi, %ebx ; CHECK-NEXT: decl %ebx ; CHECK-NEXT: .p2align 4, 0x90 @@ -49,9 +48,8 @@ define void @baz(i32 %0, i32 %1) nounwind { ; CHECK-NEXT: callq foo@PLT ; CHECK-NEXT: addl $-1, %ebx ; CHECK-NEXT: jae .LBB1_2 -; CHECK-NEXT: # %bb.3: +; CHECK-NEXT: .LBB1_3: ; CHECK-NEXT: popq %rbx -; CHECK-NEXT: .LBB1_4: ; CHECK-NEXT: retq %3 = icmp eq i32 %0, 0 br i1 %3, label %8, label %4 diff --git a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll index 379174fa04dda..ec4a12eadb94e 100644 --- a/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll +++ b/llvm/test/CodeGen/X86/x86-shrink-wrapping.ll @@ -639,11 +639,11 @@ declare hidden fastcc ptr @find_temp_slot_from_address(ptr readonly) define void @useLEA(ptr readonly %x) { ; ENABLE-LABEL: useLEA: ; ENABLE: ## %bb.0: ## %entry -; ENABLE-NEXT: testq %rdi, %rdi -; ENABLE-NEXT: je LBB8_8 -; ENABLE-NEXT: ## %bb.1: ## %if.end ; ENABLE-NEXT: pushq %rax ; ENABLE-NEXT: .cfi_def_cfa_offset 16 +; ENABLE-NEXT: testq %rdi, %rdi +; ENABLE-NEXT: je LBB8_7 +; ENABLE-NEXT: ## %bb.1: ## %if.end ; ENABLE-NEXT: cmpw $66, (%rdi) ; ENABLE-NEXT: jne LBB8_7 ; ENABLE-NEXT: ## %bb.2: ## %lor.lhs.false @@ -652,13 +652,12 @@ define void @useLEA(ptr readonly %x) { ; ENABLE-NEXT: leal -54(%rax), %ecx ; ENABLE-NEXT: cmpl $14, %ecx ; ENABLE-NEXT: ja LBB8_3 -; ENABLE-NEXT: ## %bb.9: ## %lor.lhs.false +; ENABLE-NEXT: ## %bb.8: ## %lor.lhs.false ; ENABLE-NEXT: movl $24599, %edx ## imm = 0x6017 ; ENABLE-NEXT: btl %ecx, %edx ; ENABLE-NEXT: jae LBB8_3 -; ENABLE-NEXT: LBB8_7: -; ENABLE-NEXT: addq $8, %rsp -; ENABLE-NEXT: LBB8_8: ## %cleanup +; ENABLE-NEXT: LBB8_7: ## %cleanup +; ENABLE-NEXT: popq %rax ; ENABLE-NEXT: retq ; ENABLE-NEXT: LBB8_3: ## %lor.lhs.false ; ENABLE-NEXT: cmpl $134, %eax @@ -672,7 +671,8 @@ define void @useLEA(ptr readonly %x) { ; ENABLE-NEXT: je LBB8_7 ; ENABLE-NEXT: ## %bb.6: ## %if.then.60 ; ENABLE-NEXT: movb $1, 57(%rax) -; ENABLE-NEXT: jmp LBB8_7 +; ENABLE-NEXT: popq %rax +; ENABLE-NEXT: retq ; ; DISABLE-LABEL: useLEA: ; DISABLE: ## %bb.0: ## %entry diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll index 63a3c725ae89e..fa1c208ffbd77 100644 --- a/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll @@ -182,12 +182,12 @@ exit: define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind { ; X64-LABEL: extrastride: ; X64: # %bb.0: # %entry +; X64-NEXT: pushq %rbx ; X64-NEXT: # kill: def $ecx killed $ecx def $rcx ; X64-NEXT: # kill: def $esi killed $esi def $rsi ; X64-NEXT: testl %r9d, %r9d -; X64-NEXT: je .LBB2_4 +; X64-NEXT: je .LBB2_3 ; X64-NEXT: # %bb.1: # %for.body.lr.ph -; X64-NEXT: pushq %rbx ; X64-NEXT: leal (%rsi,%rsi), %r10d ; X64-NEXT: leal (%rsi,%rsi,2), %r11d ; X64-NEXT: addl %esi, %ecx @@ -213,9 +213,8 @@ define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture % ; X64-NEXT: addq %r8, %rdx ; X64-NEXT: decl %r9d ; X64-NEXT: jne .LBB2_2 -; X64-NEXT: # %bb.3: +; X64-NEXT: .LBB2_3: # %for.end ; X64-NEXT: popq %rbx -; X64-NEXT: .LBB2_4: # %for.end ; X64-NEXT: retq ; ; X32-LABEL: extrastride: