diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h index 5df11a45b4889..5e35ed64ec9cc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPU.h +++ b/llvm/lib/Target/AMDGPU/AMDGPU.h @@ -45,6 +45,7 @@ FunctionPass *createSIWholeQuadModeLegacyPass(); FunctionPass *createSIFixControlFlowLiveIntervalsPass(); FunctionPass *createSIOptimizeExecMaskingPreRAPass(); FunctionPass *createSIOptimizeVGPRLiveRangeLegacyPass(); +FunctionPass *createAMDGPUNextUseAnalysisPass(); FunctionPass *createSIFixSGPRCopiesLegacyPass(); FunctionPass *createLowerWWMCopiesPass(); FunctionPass *createSIMemoryLegalizerPass(); @@ -191,6 +192,9 @@ extern char &SIFixSGPRCopiesLegacyID; void initializeSIFixVGPRCopiesLegacyPass(PassRegistry &); extern char &SIFixVGPRCopiesID; +void initializeAMDGPUNextUseAnalysisPassPass(PassRegistry &); +extern char &AMDGPUNextUseAnalysisID; + void initializeSILowerWWMCopiesLegacyPass(PassRegistry &); extern char &SILowerWWMCopiesLegacyID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp new file mode 100644 index 0000000000000..a413b097b961a --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.cpp @@ -0,0 +1,588 @@ +//===---------------------- AMDGPUNextUseAnalysis.cpp --------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "AMDGPUNextUseAnalysis.h" +#include "AMDGPU.h" +#include "GCNSubtarget.h" +#include "llvm/InitializePasses.h" +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "amdgpu-next-use-analysis" + +static cl::opt DumpNextUseDistance("dump-next-use-distance", + cl::init(false), cl::Hidden); + +bool AMDGPUNextUseAnalysis::isBackedge(MachineBasicBlock *From, + MachineBasicBlock *To) const { + if (!From->isSuccessor(To)) + return false; + MachineLoop *Loop1 = MLI->getLoopFor(From); + MachineLoop *Loop2 = MLI->getLoopFor(To); + if (!Loop1 || !Loop2 || Loop1 != Loop2) + return false; + MachineBasicBlock *LoopHeader = Loop1->getHeader(); + if (To != LoopHeader) + return false; + SmallVector Latches; + Loop1->getLoopLatches(Latches); + auto It = llvm::find(Latches, From); + return It != Latches.end(); +} + +// Calculate the shortest distance between two blocks using Dijkstra algorithm. +double AMDGPUNextUseAnalysis::getShortestPath(MachineBasicBlock *FromMBB, + MachineBasicBlock *ToMBB) { + assert(FromMBB != ToMBB && "The basic blocks should be different.\n"); + DenseSet Visited; + struct Data { + MachineBasicBlock *BestPred = nullptr; + double ShortestDistance = std::numeric_limits::max(); + }; + DenseMap MBBData; + + auto Cmp = [&MBBData](MachineBasicBlock *MBB1, MachineBasicBlock *MBB2) { + return MBBData[MBB1].ShortestDistance > MBBData[MBB2].ShortestDistance; + }; + std::priority_queue, + decltype(Cmp)> + Worklist(Cmp); + + Worklist.push(FromMBB); + MBBData[FromMBB] = {nullptr, 0.0}; + + while (!Worklist.empty()) { + MachineBasicBlock *CurMBB = Worklist.top(); + Worklist.pop(); + + if (!Visited.insert(CurMBB).second) + continue; + + if (CurMBB == ToMBB) { + auto *Pred = MBBData[CurMBB].BestPred; + return MBBData[Pred].ShortestDistance - MBBData[FromMBB].ShortestDistance; + } + + auto Pair = MBBData.try_emplace( + CurMBB, Data{nullptr, std::numeric_limits::max()}); + double CurrMBBDist = Pair.first->second.ShortestDistance; + + for (MachineBasicBlock *Succ : CurMBB->successors()) { + if (isBackedge(CurMBB, Succ)) + continue; + + auto GetEffectiveLoopDepth = [&](MachineBasicBlock *BB) -> double { + MachineLoop *LoopBB = MLI->getLoopFor(BB); + double LoopDepth = 0.0; + for (MachineLoop *TmpLoop = LoopBB, + *End = LoopBB->getOutermostLoop()->getParentLoop(); + TmpLoop != End; TmpLoop = TmpLoop->getParentLoop()) { + if (TmpLoop->contains(ToMBB)) + continue; + LoopDepth++; + } + return LoopDepth; + }; + + auto GetLoopWeight = [&](MachineBasicBlock *BB) -> double { + MachineLoop *LoopBB = MLI->getLoopFor(BB); + MachineLoop *LoopTo = MLI->getLoopFor(ToMBB); + if (!LoopBB && !LoopTo) + return 0.0; + + if (LoopBB && LoopTo && + (LoopTo->contains(LoopBB) && (LoopTo != LoopBB))) + return std::pow(LoopWeight, + static_cast(MLI->getLoopDepth(BB) - + MLI->getLoopDepth(ToMBB))); + + if ((LoopBB && LoopTo && LoopBB->contains(LoopTo))) + return 1.0; + + if ((!LoopTo && LoopBB) || + (LoopBB && LoopTo && !LoopTo->contains(LoopBB))) + return std::pow(LoopWeight, GetEffectiveLoopDepth(BB)); + + return 0.0; + }; + + auto GetWeightedSize = [&](MachineBasicBlock *BB) -> double { + double LoopWeight = GetLoopWeight(BB); + if (LoopWeight != 0.0) + return static_cast(BB->size()) * LoopWeight; + return static_cast(BB->size()); + }; + double NewSuccDist = CurrMBBDist + GetWeightedSize(Succ); + + auto &[SuccPred, SuccDist] = MBBData[Succ]; + if (NewSuccDist < SuccDist) { + // We found a better path to Succ, update best predecessor and distance + SuccPred = CurMBB; + SuccDist = NewSuccDist; + } + + Worklist.push(Succ); + } + } + return std::numeric_limits::max(); +} + +void AMDGPUNextUseAnalysis::calculateShortestPaths(MachineFunction &MF) { + for (MachineBasicBlock &MBB1 : MF) { + for (MachineBasicBlock &MBB2 : MF) { + if (&MBB1 == &MBB2) + continue; + ShortestPathTable[std::make_pair(&MBB1, &MBB2)] = + getShortestPath(&MBB1, &MBB2); + } + } +} + +double AMDGPUNextUseAnalysis::calculateShortestDistance(MachineInstr *CurMI, + MachineInstr *UseMI) { + MachineBasicBlock *CurMBB = CurMI->getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + + if (CurMBB == UseMBB) + return getInstrId(UseMI) - getInstrId(CurMI); + + double CurMIDistanceToBBEnd = + getInstrId(&*(std::prev(CurMBB->instr_end()))) - getInstrId(CurMI); + double UseDistanceFromBBBegin = getInstrId(&*(UseMI->getIterator())) - + getInstrId(&*(UseMBB->instr_begin())) + 1; + double Dst = getShortestDistanceFromTable(CurMBB, UseMBB); + assert(Dst != std::numeric_limits::max()); + return CurMIDistanceToBBEnd + Dst + UseDistanceFromBBBegin; +} + +std::pair +AMDGPUNextUseAnalysis::getShortestDistanceToExitingLatch( + MachineBasicBlock *CurMBB, MachineLoop *CurLoop) const { + SmallVector Latches; + CurLoop->getLoopLatches(Latches); + double ShortestDistanceToLatch = std::numeric_limits::max(); + MachineBasicBlock *ExitingLatch = nullptr; + + for (MachineBasicBlock *LMBB : Latches) { + if (LMBB == CurMBB) + return std::make_pair(0.0, CurMBB); + + double Dst = getShortestDistanceFromTable(CurMBB, LMBB); + if (ShortestDistanceToLatch > Dst) { + ShortestDistanceToLatch = Dst; + ExitingLatch = LMBB; + } + } + return std::make_pair(ShortestDistanceToLatch, ExitingLatch); +} + +std::pair +AMDGPUNextUseAnalysis::getLoopDistanceAndExitingLatch( + MachineBasicBlock *CurMBB) const { + MachineLoop *CurLoop = MLI->getLoopFor(CurMBB); + MachineBasicBlock *LoopHeader = CurLoop->getHeader(); + SmallVector Latches; + CurLoop->getLoopLatches(Latches); + bool IsCurLoopLatch = llvm::any_of( + Latches, [&](MachineBasicBlock *LMBB) { return CurMBB == LMBB; }); + MachineBasicBlock *ExitingLatch = nullptr; + double DistanceToLatch = 0.0; + double TotalDistance = 0.0; + + if (CurLoop->getNumBlocks() == 1) + return std::make_pair(static_cast(CurMBB->size()), CurMBB); + + if (CurMBB == LoopHeader) { + std::tie(DistanceToLatch, ExitingLatch) = + getShortestDistanceToExitingLatch(CurMBB, CurLoop); + TotalDistance = static_cast(LoopHeader->size()) + DistanceToLatch + + static_cast(ExitingLatch->size()); + return std::make_pair(TotalDistance, ExitingLatch); + } + + if (IsCurLoopLatch) { + TotalDistance = static_cast(LoopHeader->size()) + + getShortestDistanceFromTable(LoopHeader, CurMBB) + + static_cast(CurMBB->size()); + return std::make_pair(TotalDistance, CurMBB); + } + + double LoopHeaderToCurMBBDistance = + getShortestDistanceFromTable(LoopHeader, CurMBB); + + std::tie(DistanceToLatch, ExitingLatch) = + getShortestDistanceToExitingLatch(CurMBB, CurLoop); + + TotalDistance = static_cast(LoopHeader->size()) + + LoopHeaderToCurMBBDistance + + static_cast(CurMBB->size()) + DistanceToLatch + + static_cast(ExitingLatch->size()); + return std::make_pair(TotalDistance, ExitingLatch); +} + +// Calculates the overhead of a loop nest for three cases: 1. the use is outside +// of the current loop, but they share the same loop nest 2. the use is +// outside of the current loop nest and 3. the use is in a parent loop of the +// current loop nest. +std::pair +AMDGPUNextUseAnalysis::getNestedLoopDistanceAndExitingLatch( + MachineBasicBlock *CurMBB, MachineBasicBlock *UseMBB, + bool IsUseOutsideOfTheCurrentLoopNest, bool IsUseInParentLoop) { + MachineLoop *CurLoop = MLI->getLoopFor(CurMBB); + MachineLoop *UseLoop = MLI->getLoopFor(UseMBB); + + auto GetLoopDistance = + [&](MachineLoop *ML) -> std::pair { + double ShortestDistance = 0.0; + double LoopDistance = 0.0; + MachineBasicBlock *ExitingLatch = nullptr; + double UseLoopDepth = !IsUseOutsideOfTheCurrentLoopNest + ? static_cast(MLI->getLoopDepth(UseMBB)) + : 0.0; + if (ML->getNumBlocks() == 1) { + ShortestDistance = + static_cast(ML->getHeader()->size()) * + std::pow(LoopWeight, + (static_cast(MLI->getLoopDepth(ML->getHeader())) - + UseLoopDepth)); + return std::make_pair(ShortestDistance, ML->getLoopLatch()); + } + std::tie(LoopDistance, ExitingLatch) = + getLoopDistanceAndExitingLatch(ML->getHeader()); + ShortestDistance = LoopDistance * LoopWeight; + return std::make_pair(ShortestDistance, ExitingLatch); + }; + + if (IsUseOutsideOfTheCurrentLoopNest) { + MachineLoop *OutermostLoop = CurLoop->getOutermostLoop(); + if (OutermostLoop->contains(UseLoop)) { + // The CurLoop and the UseLoop are independent and they are in the same + // loop nest. + if (MLI->getLoopDepth(CurMBB) <= MLI->getLoopDepth(UseMBB)) { + return GetLoopDistance(CurLoop); + } else { + assert(CurLoop != OutermostLoop && "The loop cannot be the outermost."); + MachineLoop *OuterLoopOfCurLoop = CurLoop; + while (OutermostLoop != OuterLoopOfCurLoop && + MLI->getLoopDepth(OuterLoopOfCurLoop->getHeader()) != + MLI->getLoopDepth(UseMBB)) { + OuterLoopOfCurLoop = OuterLoopOfCurLoop->getParentLoop(); + } + return GetLoopDistance(OuterLoopOfCurLoop); + } + } else { + // We should take into consideration the whole loop nest in the + // calculation of the distance because we will reach the use after + // executing the whole loop nest. + return GetLoopDistance(OutermostLoop); + } + } else if (IsUseInParentLoop) { + MachineLoop *UseLoopSubLoop = nullptr; + for (MachineLoop *ML : UseLoop->getSubLoopsVector()) { + // All the sub-loops of the UseLoop will be executed before the use. + // Hence, we should take this into consideration in distance calculation. + if (ML->contains(CurLoop)) { + UseLoopSubLoop = ML; + break; + } + } + return GetLoopDistance(UseLoopSubLoop); + } + llvm_unreachable("Failed to calculate the loop distance!"); +} + +double AMDGPUNextUseAnalysis::calculateCurLoopDistance(Register DefReg, + MachineInstr *CurMI, + MachineInstr *UseMI) { + MachineBasicBlock *CurMBB = CurMI->getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + MachineLoop *CurLoop = MLI->getLoopFor(CurMBB); + MachineLoop *UseLoop = MLI->getLoopFor(UseMBB); + double LoopDistance = 0.0; + MachineBasicBlock *ExitingLatch = nullptr; + bool IsUseInParentLoop = CurLoop && UseLoop && + (UseLoop->contains(CurLoop) && (UseLoop != CurLoop)); + + bool IsUseOutsideOfTheCurrentLoopNest = + (!UseLoop && CurLoop) || + (CurLoop && UseLoop && !UseLoop->contains(CurLoop) && + !CurLoop->contains(UseLoop)); + + if (IsUseOutsideOfTheCurrentLoopNest) { + if (CurLoop->getSubLoops().empty() && CurLoop->isOutermost()) { + std::tie(LoopDistance, ExitingLatch) = + getLoopDistanceAndExitingLatch(CurMBB); + LoopDistance = LoopDistance * LoopWeight; + } else { + std::tie(LoopDistance, ExitingLatch) = + getNestedLoopDistanceAndExitingLatch(CurMBB, UseMBB, true, false); + } + } else if (IsUseInParentLoop) { + assert(MLI->getLoopDepth(UseMBB) < MLI->getLoopDepth(CurMBB) && + "The loop depth of the current instruction must be bigger than " + "these.\n"); + if (isIncomingValFromBackedge(CurMI, UseMI, DefReg)) + return calculateBackedgeDistance(CurMI, UseMI); + + // Get the loop distance of all the inner loops of UseLoop. + std::tie(LoopDistance, ExitingLatch) = + getNestedLoopDistanceAndExitingLatch(CurMBB, UseMBB, false, true); + } + + double UseDistanceFromBBBegin = getInstrId(&*(UseMI->getIterator())) - + getInstrId(&*(UseMBB->instr_begin())) + 1; + return LoopDistance + getShortestDistanceFromTable(ExitingLatch, UseMBB) + + UseDistanceFromBBBegin; +} + +double AMDGPUNextUseAnalysis::calculateBackedgeDistance(MachineInstr *CurMI, + MachineInstr *UseMI) { + MachineBasicBlock *CurMBB = CurMI->getParent(); + MachineBasicBlock *UseMBB = UseMI->getParent(); + MachineLoop *CurLoop = MLI->getLoopFor(CurMBB); + MachineLoop *UseLoop = MLI->getLoopFor(UseMBB); + assert(UseLoop && "There is no backedge.\n"); + double CurMIDistanceToBBEnd = + getInstrId(&*(std::prev(CurMBB->instr_end()))) - getInstrId(CurMI); + double UseDistanceFromBBBegin = getInstrId(&*(UseMI->getIterator())) - + getInstrId(&*(UseMBB->instr_begin())) + 1; + + if (!CurLoop) + return CurMIDistanceToBBEnd + getShortestDistanceFromTable(CurMBB, UseMBB) + + UseDistanceFromBBBegin; + + if (CurLoop == UseLoop) { + auto [DistanceToLatch, ExitingLatch] = + getShortestDistanceToExitingLatch(CurMBB, CurLoop); + if (ExitingLatch == CurMBB) + return CurMIDistanceToBBEnd + UseDistanceFromBBBegin; + return UseDistanceFromBBBegin + CurMIDistanceToBBEnd + DistanceToLatch + + static_cast(ExitingLatch->size()); + } + + if (!CurLoop->contains(UseLoop) && !UseLoop->contains(CurLoop)) { + auto [LoopDistance, ExitingLatch] = getLoopDistanceAndExitingLatch(CurMBB); + return LoopDistance + getShortestDistanceFromTable(ExitingLatch, UseMBB) + + UseDistanceFromBBBegin; + } + + if (!CurLoop->contains(UseLoop)) { + auto [InnerLoopDistance, InnerLoopExitingLatch] = + getNestedLoopDistanceAndExitingLatch(CurMBB, UseMBB, false, true); + auto [DistanceToLatch, ExitingLatch] = + getShortestDistanceToExitingLatch(InnerLoopExitingLatch, UseLoop); + return InnerLoopDistance + DistanceToLatch + + static_cast(ExitingLatch->size()) + UseDistanceFromBBBegin; + } + + llvm_unreachable("The backedge distance has not been calculated!"); +} + +bool AMDGPUNextUseAnalysis::isIncomingValFromBackedge(MachineInstr *CurMI, + MachineInstr *UseMI, + Register DefReg) const { + if (!UseMI->isPHI()) + return false; + + MachineLoop *CurLoop = MLI->getLoopFor(CurMI->getParent()); + MachineLoop *UseLoop = MLI->getLoopFor(UseMI->getParent()); + + if (!UseLoop) + return false; + + if (CurLoop && !UseLoop->contains(CurLoop)) + return false; + + if (UseMI->getParent() != UseLoop->getHeader()) + return false; + + SmallVector Latches; + UseLoop->getLoopLatches(Latches); + + bool IsNotIncomingValFromLatch = false; + bool IsIncomingValFromLatch = false; + auto Ops = UseMI->operands(); + for (auto It = std::next(Ops.begin()), ItE = Ops.end(); It != ItE; + It = std::next(It, 2)) { + auto &RegMO = *It; + auto &MBBMO = *std::next(It); + assert(RegMO.isReg() && "Expected register operand of PHI"); + assert(MBBMO.isMBB() && "Expected MBB operand of PHI"); + if (RegMO.getReg() == DefReg) { + MachineBasicBlock *IncomingBB = MBBMO.getMBB(); + auto It = llvm::find(Latches, IncomingBB); + if (It == Latches.end()) + IsNotIncomingValFromLatch = true; + else + IsIncomingValFromLatch = true; + } + } + return IsIncomingValFromLatch && !IsNotIncomingValFromLatch; +} + +void AMDGPUNextUseAnalysis::dumpShortestPaths() const { + for (const auto &P : ShortestPathTable) { + MachineBasicBlock *From = P.first.first; + MachineBasicBlock *To = P.first.second; + double Dist = P.second; + errs() << "From: " << From->getName() << "-> To:" << To->getName() << " = " + << Dist << "\n"; + } +} + +void AMDGPUNextUseAnalysis::printAllDistances(MachineFunction &MF) { + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : *&MBB) { + for (const MachineOperand &MO : MI.operands()) { + if (!MO.isReg()) + continue; + + Register Reg = MO.getReg(); + if (!MO.isReg()) + continue; + + if (MO.isUse()) + continue; + + if (Reg.isPhysical() || TRI->isAGPR(*MRI, Reg)) + continue; + + std::optional NextUseDistance = getNextUseDistance(Reg); + errs() << "Next-use distance of Register " << printReg(Reg, TRI) + << " = "; + if (NextUseDistance) + errs() << format("%.1f", *NextUseDistance); + else + errs() << "null"; + errs() << "\n"; + } + } + } +} + + +// TODO: Remove it. It is only used for testing. +std::optional +AMDGPUNextUseAnalysis::getNextUseDistance(Register DefReg) { + assert(!DefReg.isPhysical() && !TRI->isAGPR(*MRI, DefReg) && + "Next-use distance is calculated for SGPRs and VGPRs"); + double NextUseDistance = std::numeric_limits::max(); + double CurrentNextUseDistance = std::numeric_limits::max(); + MachineInstr *CurMI = &*MRI->def_instr_begin(DefReg); + MachineBasicBlock *CurMBB = CurMI->getParent(); + MachineLoop *CurLoop = MLI->getLoopFor(CurMBB); + for (auto &UseMI : MRI->use_nodbg_instructions(DefReg)) { + MachineBasicBlock *UseMBB = UseMI.getParent(); + MachineLoop *UseLoop = MLI->getLoopFor(UseMBB); + + bool IsUseOutsideOfTheDefinitionLoop = + (CurLoop && !UseLoop) || + (CurLoop && UseLoop && + ((!UseLoop->contains(CurLoop) && !CurLoop->contains(UseLoop)) || + (UseLoop->contains(CurLoop) && (UseLoop != CurLoop)))); + + if (IsUseOutsideOfTheDefinitionLoop) { + CurrentNextUseDistance = calculateCurLoopDistance(DefReg, CurMI, &UseMI); + } else if (isIncomingValFromBackedge(CurMI, &UseMI, DefReg)) { + CurrentNextUseDistance = calculateBackedgeDistance(CurMI, &UseMI); + } else { + CurrentNextUseDistance = calculateShortestDistance(CurMI, &UseMI); + } + + if (CurrentNextUseDistance < NextUseDistance) + NextUseDistance = CurrentNextUseDistance; + } + return NextUseDistance != std::numeric_limits::max() + ? std::optional(NextUseDistance) + : std::nullopt; +} + +std::optional +AMDGPUNextUseAnalysis::getNextUseDistance(Register DefReg, MachineInstr *CurMI, + SmallVector &Uses) { + assert(!DefReg.isPhysical() && !TRI->isAGPR(*MRI, DefReg) && + "Next-use distance is calculated for SGPRs and VGPRs"); + double NextUseDistance = std::numeric_limits::max(); + double CurrentNextUseDistance = std::numeric_limits::max(); + MachineBasicBlock *CurMBB = CurMI->getParent(); + MachineLoop *CurLoop = MLI->getLoopFor(CurMBB); + for (auto *UseMI : Uses) { + MachineBasicBlock *UseMBB = UseMI->getParent(); + MachineLoop *UseLoop = MLI->getLoopFor(UseMBB); + + bool IsUseOutsideOfCurLoop = + (CurLoop && !UseLoop) || + (CurLoop && UseLoop && + ((!UseLoop->contains(CurLoop) && !CurLoop->contains(UseLoop)) || + (UseLoop->contains(CurLoop) && (UseLoop != CurLoop)))); + + if (IsUseOutsideOfCurLoop) { + CurrentNextUseDistance = calculateCurLoopDistance(DefReg, CurMI, UseMI); + } else if (isIncomingValFromBackedge(CurMI, UseMI, DefReg)) { + CurrentNextUseDistance = calculateBackedgeDistance(CurMI, UseMI); + } else { + CurrentNextUseDistance = calculateShortestDistance(CurMI, UseMI); + } + + if (CurrentNextUseDistance < NextUseDistance) + NextUseDistance = CurrentNextUseDistance; + } + return NextUseDistance != std::numeric_limits::max() + ? std::optional(NextUseDistance) + : std::nullopt; +} + +bool AMDGPUNextUseAnalysis::run(MachineFunction &MF, + const MachineLoopInfo *MLInfo) { + + const GCNSubtarget &ST = MF.getSubtarget(); + TII = ST.getInstrInfo(); + TRI = &TII->getRegisterInfo(); + MLI = MLInfo; + MRI = &MF.getRegInfo(); + + for (MachineBasicBlock &BB : MF) { + double Id = 0.0; + for (MachineInstr &MI : BB) { + InstrToId[&MI] = ++Id; + } + } + + calculateShortestPaths(MF); + + if (DumpNextUseDistance) { + MF.print(errs()); + printAllDistances(MF); + } + + return true; +} + +bool AMDGPUNextUseAnalysisPass::runOnMachineFunction(MachineFunction &MF) { + MLI = &getAnalysis().getLI(); + AMDGPUNextUseAnalysis NUA; + return NUA.run(MF, MLI); +} + +char AMDGPUNextUseAnalysisPass::ID = 0; + +INITIALIZE_PASS_BEGIN(AMDGPUNextUseAnalysisPass, DEBUG_TYPE, + "Next Use Analysis", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LiveVariablesWrapperPass) +INITIALIZE_PASS_END(AMDGPUNextUseAnalysisPass, DEBUG_TYPE, "Next Use Analysis", + false, false) + +char &llvm::AMDGPUNextUseAnalysisID = AMDGPUNextUseAnalysisPass::ID; + +FunctionPass *llvm::createAMDGPUNextUseAnalysisPass() { + return new AMDGPUNextUseAnalysisPass(); +} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h new file mode 100644 index 0000000000000..25edf06d2afcf --- /dev/null +++ b/llvm/lib/Target/AMDGPU/AMDGPUNextUseAnalysis.h @@ -0,0 +1,161 @@ +//===---------------------- AMDGPUNextUseAnalysis.h ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements Next Use Analysis. +// For each register it goes over all uses and returns the estimated distance of +// the nearest use. This will be used for selecting which registers to spill +// before register allocation. +// +// This is based on ideas from the paper: +// "Register Spilling and Live-Range Splitting for SSA-Form Programs" +// Matthias Braun and Sebastian Hack, CC'09 +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUNEXTUSEANALYSIS_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUNEXTUSEANALYSIS_H + +#include "SIInstrInfo.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetRegisterInfo.h" +#include + +using namespace llvm; + +class AMDGPUNextUseAnalysis { + const SIRegisterInfo *TRI = nullptr; + const SIInstrInfo *TII = nullptr; + const MachineLoopInfo *MLI = nullptr; + MachineRegisterInfo *MRI = nullptr; + /// Instruction to instruction-id map. + DenseMap InstrToId; + /// Returns MI's instruction ID. It renumbers (part of) the BB if MI is not + /// found in the map. + double getInstrId(MachineInstr *MI) { + auto It = InstrToId.find(MI); + if (It != InstrToId.end()) + return It->second; + // Renumber the MBB. + // TODO: Renumber from MI onwards. + auto *MBB = MI->getParent(); + double Id = 0.0; + for (auto &MBBMI : *MBB) + InstrToId[&MBBMI] = Id++; + return InstrToId.find(MI)->second; + } + /// [FromMBB, ToMBB] to shortest distance map. + DenseMap, double> + ShortestPathTable; + /// We assume an approximate trip count of 1000 for all loops. + static constexpr const double LoopWeight = 1000.0; + /// Returns the shortest ditance from ShortestPathTable. Will crash if + /// {FromMBB,ToMBB} not found. + double getShortestDistanceFromTable(MachineBasicBlock *FromMBB, + MachineBasicBlock *ToMBB) const { + auto It = ShortestPathTable.find({FromMBB, ToMBB}); + assert(It != ShortestPathTable.end() && "Not found in table!"); + return It->second; + } + bool isBackedge(MachineBasicBlock *From, MachineBasicBlock *To) const; + double getShortestPath(MachineBasicBlock *From, MachineBasicBlock *To); + /// Goes over all MBB pairs in \p MF, calculates the shortest path between + /// them and fills in \p ShortestPathTable. + void calculateShortestPaths(MachineFunction &MF); + /// If the path from \p MI to \p UseMI does not cross any loops, then this + /// \returns the shortest instruction distance between them. + double calculateShortestDistance(MachineInstr *MI, MachineInstr *UseMI); + /// /Returns the shortest distance between a given basic block \p CurMBB and + /// its closest exiting latch of \p CurLoop. + std::pair + getShortestDistanceToExitingLatch(MachineBasicBlock *CurMBB, + MachineLoop *CurLoop) const; + /// Helper function for calculating the minimum instruction distance from the + /// outer loop header to the outer loop latch. + std::pair getNestedLoopDistanceAndExitingLatch( + MachineBasicBlock *CurMBB, MachineBasicBlock *UseMBB, + bool IsUseOutsideOfTheCurLoopNest = false, + bool IsUseInParentLoop = false); + /// Given \p CurMI in a loop and \p UseMI outside the loop, this function + /// returns the minimum instruction path between \p CurMI and \p UseMI. + /// Please note that since \p CurMI is in a loop we don't care about the + /// exact position of the instruction in the block because we are making a + /// rough estimate of the dynamic instruction path length, given that the loop + /// iterates multiple times. + double calculateCurLoopDistance(Register DefReg, MachineInstr *CurMI, + MachineInstr *UseMI); + /// \Returns the shortest path distance from \p CurMI to the end of the loop + /// latch plus the distance from the top of the loop header to the PHI use. + double calculateBackedgeDistance(MachineInstr *CurMI, MachineInstr *UseMI); + /// \Returns true if the use of \p DefReg (\p UseMI) is a PHI in the loop + /// header, i.e., DefReg is flowing through the back-edge. + bool isIncomingValFromBackedge(MachineInstr *CurMI, MachineInstr *UseMI, + Register DefReg) const; + + void dumpShortestPaths() const; + + void printAllDistances(MachineFunction &); + + void clearTables() { + InstrToId.clear(); + ShortestPathTable.clear(); + } + +public: + AMDGPUNextUseAnalysis() = default; + + ~AMDGPUNextUseAnalysis() { clearTables(); } + + bool run(MachineFunction &, const MachineLoopInfo *); + + /// \Returns the next-use distance for \p DefReg. + std::optional getNextUseDistance(Register DefReg); + + std::optional getNextUseDistance(Register DefReg, MachineInstr *CurMI, + SmallVector &Uses); + + /// Helper function that finds the shortest instruction path in \p CurMMB's + /// loop that includes \p CurMBB and starts from the loop header and ends at + /// the earliest loop latch. \Returns the path cost and the earliest latch + /// MBB. + std::pair + getLoopDistanceAndExitingLatch(MachineBasicBlock *CurMBB) const; + /// Returns the shortest ditance from ShortestPathTable. + double getShortestDistance(MachineBasicBlock *FromMBB, + MachineBasicBlock *ToMBB) const { + auto It = ShortestPathTable.find({FromMBB, ToMBB}); + if (It == ShortestPathTable.end()) + return std::numeric_limits::max(); + return It->second; + } +}; + +class AMDGPUNextUseAnalysisPass : public MachineFunctionPass { + const MachineLoopInfo *MLI = nullptr; + +public: + static char ID; + + AMDGPUNextUseAnalysisPass() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &) override; + + StringRef getPassName() const override { return "Next Use Analysis"; } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addPreserved(); + AU.addPreserved(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUNEXTUSEANALYSIS_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 8a831f7915882..509124c3fe20f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -538,6 +538,10 @@ static cl::opt EnableUniformIntrinsicCombine( cl::desc("Enable/Disable the Uniform Intrinsic Combine Pass"), cl::init(true), cl::Hidden); +static cl::opt OptNextUseAnalysis("enable-next-use-analysis", + cl::desc("Enable next-use analysis"), + cl::init(true), cl::Hidden); + extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { // Register the target RegisterTargetMachine X(getTheR600Target()); @@ -570,6 +574,7 @@ extern "C" LLVM_ABI LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() { initializeSIShrinkInstructionsLegacyPass(*PR); initializeSIOptimizeExecMaskingPreRALegacyPass(*PR); initializeSIOptimizeVGPRLiveRangeLegacyPass(*PR); + initializeAMDGPUNextUseAnalysisPassPass(*PR); initializeSILoadStoreOptimizerLegacyPass(*PR); initializeAMDGPUCtorDtorLoweringLegacyPass(*PR); initializeAMDGPUAlwaysInlinePass(*PR); @@ -1602,6 +1607,9 @@ void GCNPassConfig::addOptimizedRegAlloc() { if (OptVGPRLiveRange) insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeLegacyID); + if (OptNextUseAnalysis) + insertPass(&LiveVariablesID, &AMDGPUNextUseAnalysisID); + // This must be run immediately after phi elimination and before // TwoAddressInstructions, otherwise the processing of the tied operand of // SI_ELSE will introduce a copy of the tied operand source after the else. diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt index 782cbfa76e6e9..a207a26753c05 100644 --- a/llvm/lib/Target/AMDGPU/CMakeLists.txt +++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt @@ -90,6 +90,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUMCResourceInfo.cpp AMDGPUMarkLastScratchLoad.cpp AMDGPUMIRFormatter.cpp + AMDGPUNextUseAnalysis.cpp AMDGPUPerfHintAnalysis.cpp AMDGPUPostLegalizerCombiner.cpp AMDGPUPreLegalizerCombiner.cpp diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 8364e680bc8c7..9fcdfcdcafd07 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -356,9 +356,11 @@ ; GCN-O1-NEXT: Live Variable Analysis ; GCN-O1-NEXT: MachineDominator Tree Construction ; GCN-O1-NEXT: SI Optimize VGPR LiveRange +; GCN-O1-NEXT: Next Use Analysis ; GCN-O1-NEXT: Eliminate PHI nodes for register allocation ; GCN-O1-NEXT: SI Lower control flow pseudo instructions ; GCN-O1-NEXT: Two-Address instruction pass +; GCN-O1-NEXT: MachineDominator Tree Construction ; GCN-O1-NEXT: Slot index numbering ; GCN-O1-NEXT: Live Interval Analysis ; GCN-O1-NEXT: Machine Natural Loop Construction @@ -677,9 +679,11 @@ ; GCN-O1-OPTS-NEXT: Remove unreachable machine basic blocks ; GCN-O1-OPTS-NEXT: Live Variable Analysis ; GCN-O1-OPTS-NEXT: SI Optimize VGPR LiveRange +; GCN-O1-OPTS-NEXT: Next Use Analysis ; GCN-O1-OPTS-NEXT: Eliminate PHI nodes for register allocation ; GCN-O1-OPTS-NEXT: SI Lower control flow pseudo instructions ; GCN-O1-OPTS-NEXT: Two-Address instruction pass +; GCN-O1-OPTS-NEXT: MachineDominator Tree Construction ; GCN-O1-OPTS-NEXT: Slot index numbering ; GCN-O1-OPTS-NEXT: Live Interval Analysis ; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction @@ -1003,9 +1007,11 @@ ; GCN-O2-NEXT: Remove unreachable machine basic blocks ; GCN-O2-NEXT: Live Variable Analysis ; GCN-O2-NEXT: SI Optimize VGPR LiveRange +; GCN-O2-NEXT: Next Use Analysis ; GCN-O2-NEXT: Eliminate PHI nodes for register allocation ; GCN-O2-NEXT: SI Lower control flow pseudo instructions ; GCN-O2-NEXT: Two-Address instruction pass +; GCN-O2-NEXT: MachineDominator Tree Construction ; GCN-O2-NEXT: Slot index numbering ; GCN-O2-NEXT: Live Interval Analysis ; GCN-O2-NEXT: Machine Natural Loop Construction @@ -1343,9 +1349,11 @@ ; GCN-O3-NEXT: Remove unreachable machine basic blocks ; GCN-O3-NEXT: Live Variable Analysis ; GCN-O3-NEXT: SI Optimize VGPR LiveRange +; GCN-O3-NEXT: Next Use Analysis ; GCN-O3-NEXT: Eliminate PHI nodes for register allocation ; GCN-O3-NEXT: SI Lower control flow pseudo instructions ; GCN-O3-NEXT: Two-Address instruction pass +; GCN-O3-NEXT: MachineDominator Tree Construction ; GCN-O3-NEXT: Slot index numbering ; GCN-O3-NEXT: Live Interval Analysis ; GCN-O3-NEXT: Machine Natural Loop Construction diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_basic_case.ll b/llvm/test/CodeGen/AMDGPU/test_ers_basic_case.ll new file mode 100644 index 0000000000000..39020ec67835a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_basic_case.ll @@ -0,0 +1,167 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +; +; bb.0.entry +; / | +; bb.3.bb2 | +; \ | +; bb.1.Flow +; / | +; bb.2.bb1 | +; \ | +; bb.4.exit +define amdgpu_ps i64 @test(ptr addrspace(3) %p1, ptr addrspace(3) %p2, i1 %cond1, i64 %val) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000); %bb.3(50.00%), %bb.1(50.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg6:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, killed [[Reg3]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg7:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 1, killed [[Reg6]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg8:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg7]]:sreg_32, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.3 +; EMPTY: +; CHECK: bb.1.Flow: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.3 +; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000); %bb.2(50.00%), %bb.4(50.00%) +; EMPTY: +; CHECK: [[Reg9:%[0-9]+]]:vreg_64 = PHI undef [[Reg10:%[0-9]+]]:vreg_64, %bb.0, [[Reg11:%[0-9]+]]:vreg_64, %bb.3 +; CHECK-NEXT: [[Reg12:%[0-9]+]]:vgpr_32 = PHI [[Reg1]]:vgpr_32, %bb.0, undef [[Reg13:%[0-9]+]]:vgpr_32, %bb.3 +; CHECK-NEXT: [[Reg14:%[0-9]+]]:sreg_32 = SI_ELSE killed [[Reg8]]:sreg_32, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.2 +; EMPTY: +; CHECK: bb.2.bb1: +; CHECK-NEXT: ; predecessors: %bb.1 +; CHECK-NEXT: successors: %bb.4(0x80000000); %bb.4(100.00%) +; EMPTY: +; CHECK: [[Reg15:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg12]]:vgpr_32, 0, 0, implicit $exec :: (load (s8) from %ir.p1, addrspace 3) +; CHECK-NEXT: [[Reg16:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg12]]:vgpr_32, 1, 0, implicit $exec :: (load (s8) from %ir.p1 + 1, addrspace 3) +; CHECK-NEXT: [[Reg17:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg12]]:vgpr_32, 2, 0, implicit $exec :: (load (s8) from %ir.p1 + 2, addrspace 3) +; CHECK-NEXT: [[Reg18:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg12]]:vgpr_32, 3, 0, implicit $exec :: (load (s8) from %ir.p1 + 3, addrspace 3) +; CHECK-NEXT: [[Reg19:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg12]]:vgpr_32, 4, 0, implicit $exec :: (load (s8) from %ir.p1 + 4, addrspace 3) +; CHECK-NEXT: [[Reg20:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg12]]:vgpr_32, 5, 0, implicit $exec :: (load (s8) from %ir.p1 + 5, addrspace 3) +; CHECK-NEXT: [[Reg21:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg12]]:vgpr_32, 6, 0, implicit $exec :: (load (s8) from %ir.p1 + 6, addrspace 3) +; CHECK-NEXT: [[Reg22:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 killed [[Reg12]]:vgpr_32, 7, 0, implicit $exec :: (load (s8) from %ir.p1 + 7, addrspace 3) +; CHECK-NEXT: [[Reg23:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg20]]:vgpr_32, 8, killed [[Reg19]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg24:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg22]]:vgpr_32, 8, killed [[Reg21]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg25:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg24]]:vgpr_32, 16, killed [[Reg23]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg26:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg16]]:vgpr_32, 8, killed [[Reg15]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg27:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg18]]:vgpr_32, 8, killed [[Reg17]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg28:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg27]]:vgpr_32, 16, killed [[Reg26]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg29:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg28]]:vgpr_32, %subreg.sub0, killed [[Reg25]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg30:%[0-9]+]]:vreg_64 = COPY killed [[Reg29]]:vreg_64 +; CHECK-NEXT: S_BRANCH %bb.4 +; EMPTY: +; CHECK: bb.3.bb2: +; CHECK-NEXT: ; predecessors: %bb.0 +; CHECK-NEXT: successors: %bb.1(0x80000000); %bb.1(100.00%) +; EMPTY: +; CHECK: [[Reg31:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 0, 0, implicit $exec :: (load (s8) from %ir.p2, addrspace 3) +; CHECK-NEXT: [[Reg32:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 1, 0, implicit $exec :: (load (s8) from %ir.p2 + 1, addrspace 3) +; CHECK-NEXT: [[Reg33:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 2, 0, implicit $exec :: (load (s8) from %ir.p2 + 2, addrspace 3) +; CHECK-NEXT: [[Reg34:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 3, 0, implicit $exec :: (load (s8) from %ir.p2 + 3, addrspace 3) +; CHECK-NEXT: [[Reg35:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 4, 0, implicit $exec :: (load (s8) from %ir.p2 + 4, addrspace 3) +; CHECK-NEXT: [[Reg36:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 5, 0, implicit $exec :: (load (s8) from %ir.p2 + 5, addrspace 3) +; CHECK-NEXT: [[Reg37:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 6, 0, implicit $exec :: (load (s8) from %ir.p2 + 6, addrspace 3) +; CHECK-NEXT: [[Reg38:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 killed [[Reg2]]:vgpr_32, 7, 0, implicit $exec :: (load (s8) from %ir.p2 + 7, addrspace 3) +; CHECK-NEXT: [[Reg39:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg36]]:vgpr_32, 8, killed [[Reg35]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg40:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg38]]:vgpr_32, 8, killed [[Reg37]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg41:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg40]]:vgpr_32, 16, killed [[Reg39]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg42:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg32]]:vgpr_32, 8, killed [[Reg31]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg43:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg34]]:vgpr_32, 8, killed [[Reg33]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg44:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg43]]:vgpr_32, 16, killed [[Reg42]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg45:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg44]]:vgpr_32, %subreg.sub0, killed [[Reg41]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg11]]:vreg_64 = COPY killed [[Reg45]]:vreg_64 +; CHECK-NEXT: S_BRANCH %bb.1 +; EMPTY: +; CHECK: bb.4.exit: +; CHECK-NEXT: ; predecessors: %bb.1, %bb.2 +; EMPTY: +; CHECK: [[Reg46:%[0-9]+]]:vreg_64 = PHI [[Reg9]]:vreg_64, %bb.1, [[Reg30]]:vreg_64, %bb.2 +; CHECK-NEXT: SI_END_CF killed [[Reg14]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg47:%[0-9]+]]:vgpr_32, [[Reg48:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[Reg46]].sub0:vreg_64, killed [[Reg4]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg49:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 killed [[Reg46]].sub1:vreg_64, killed [[Reg5]]:vgpr_32, killed [[Reg48]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg50:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[Reg47]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg51:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[Reg49]]:vgpr_32, implicit $exec +; CHECK-NEXT: $sgpr0 = COPY killed [[Reg50]]:sreg_32_xm0 +; CHECK-NEXT: $sgpr1 = COPY killed [[Reg51]]:sreg_32_xm0 +; CHECK-NEXT: SI_RETURN_TO_EPILOG killed $sgpr0, killed $sgpr1 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg5]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 14.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg12]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg22]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg39]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg11]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 2.0 +entry: +; entry +; / \ +; bb1 bb2 +; \ / +; exit + br i1 %cond1, label %bb1, label %bb2 + +bb1: + %ld1 = load i64, ptr addrspace(3) %p1, align 1 + br label %exit + +bb2: + %ld2 = load i64, ptr addrspace(3) %p2, align 1 + br label %exit + +exit: + %phi = phi i64 [ %ld1, %bb1 ], [ %ld2, %bb2 ] + %add = add i64 %phi, %val + ret i64 %add +} diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_do_not_spill_restore_inside_loop.ll b/llvm/test/CodeGen/AMDGPU/test_ers_do_not_spill_restore_inside_loop.ll new file mode 100644 index 0000000000000..274952dda5704 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_do_not_spill_restore_inside_loop.ll @@ -0,0 +1,328 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +; +; bb.0.entry +; | +; bb.1.loop1.header<---+ +; / | | +;bb.4.loop1.latch2 | | +; \ | | +; bb.2.Flow | +; / | | +;bb.3.loop1.latch1 | | +; \ | | +; bb.5.Flow1-------+ +; | +; bb.6.bb +; | +; bb.7.loop2<------+ +; | | +; +------------+ +; | +; bb.8.exit +; +define amdgpu_ps void @test(ptr addrspace(1) %p1, ptr addrspace(1) %p2, ptr addrspace(1) %p3, ptr addrspace(1) %p4, ptr addrspace(1) %p5, i1 %cond, i32 %TC1, i32 %TC2, i32 %TC3) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]], $vgpr5 in [[Reg6:%[0-9]+]], $vgpr6 in [[Reg7:%[0-9]+]], $vgpr7 in [[Reg8:%[0-9]+]], $vgpr8 in [[Reg9:%[0-9]+]], $vgpr9 in [[Reg10:%[0-9]+]], $vgpr10 in [[Reg11:%[0-9]+]], $vgpr11 in [[Reg12:%[0-9]+]], $vgpr12 in [[Reg13:%[0-9]+]], $vgpr13 in [[Reg14:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.1(0x80000000); %bb.1(100.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13 +; CHECK-NEXT: [[Reg14]]:vgpr_32 = COPY killed $vgpr13 +; CHECK-NEXT: [[Reg13]]:vgpr_32 = COPY killed $vgpr12 +; CHECK-NEXT: [[Reg12]]:vgpr_32 = COPY killed $vgpr11 +; CHECK-NEXT: [[Reg11]]:vgpr_32 = COPY killed $vgpr10 +; CHECK-NEXT: [[Reg10]]:vgpr_32 = COPY killed $vgpr9 +; CHECK-NEXT: [[Reg9]]:vgpr_32 = COPY killed $vgpr8 +; CHECK-NEXT: [[Reg8]]:vgpr_32 = COPY killed $vgpr7 +; CHECK-NEXT: [[Reg7]]:vgpr_32 = COPY killed $vgpr6 +; CHECK-NEXT: [[Reg6]]:vgpr_32 = COPY killed $vgpr5 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg15:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg9]]:vgpr_32, %subreg.sub0, killed [[Reg10]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg16:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg7]]:vgpr_32, %subreg.sub0, killed [[Reg8]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg17:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg5]]:vgpr_32, %subreg.sub0, killed [[Reg6]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg18:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg3]]:vgpr_32, %subreg.sub0, killed [[Reg4]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg19:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg1]]:vgpr_32, %subreg.sub0, killed [[Reg2]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg20:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, killed [[Reg11]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg21:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 1, killed [[Reg20]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg22:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg19]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg23:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg19]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg24:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg23]]:vgpr_32, 8, killed [[Reg22]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg25:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg19]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg26:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[Reg19]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg27:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg26]]:vgpr_32, 8, killed [[Reg25]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg28:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg27]]:vgpr_32, 16, killed [[Reg24]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg29:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 10, implicit $exec +; CHECK-NEXT: [[Reg30:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; CHECK-NEXT: [[Reg31:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; EMPTY: +; CHECK: bb.1.loop1.header: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.5 +; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000); %bb.4(50.00%), %bb.2(50.00%) +; EMPTY: +; CHECK: [[Reg32:%[0-9]+]]:sreg_32 = PHI [[Reg30]]:sreg_32, %bb.0, [[Reg33:%[0-9]+]]:sreg_32, %bb.5 +; CHECK-NEXT: [[Reg34:%[0-9]+]]:vgpr_32 = PHI [[Reg31]]:vgpr_32, %bb.0, [[Reg35:%[0-9]+]]:vgpr_32, %bb.5 +; CHECK-NEXT: [[Reg36:%[0-9]+]]:vgpr_32 = PHI [[Reg29]]:vgpr_32, %bb.0, [[Reg37:%[0-9]+]]:vgpr_32, %bb.5 +; CHECK-NEXT: [[Reg38:%[0-9]+]]:vgpr_32 = PHI [[Reg28]]:vgpr_32, %bb.0, [[Reg39:%[0-9]+]]:vgpr_32, %bb.5 +; CHECK-NEXT: [[Reg40:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 +; CHECK-NEXT: [[Reg41:%[0-9]+]]:sreg_32 = SI_IF [[Reg21]]:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.4 +; EMPTY: +; CHECK: bb.2.Flow: +; CHECK-NEXT: ; predecessors: %bb.1, %bb.4 +; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000); %bb.3(50.00%), %bb.5(50.00%) +; EMPTY: +; CHECK: [[Reg42:%[0-9]+]]:sreg_32 = PHI [[Reg40]]:sreg_32, %bb.1, [[Reg43:%[0-9]+]]:sreg_32, %bb.4 +; CHECK-NEXT: [[Reg44:%[0-9]+]]:vgpr_32 = PHI undef [[Reg45:%[0-9]+]]:vgpr_32, %bb.1, [[Reg46:%[0-9]+]]:vgpr_32, %bb.4 +; CHECK-NEXT: [[Reg47:%[0-9]+]]:vgpr_32 = PHI undef [[Reg45]]:vgpr_32, %bb.1, [[Reg48:%[0-9]+]]:vgpr_32, %bb.4 +; CHECK-NEXT: [[Reg49:%[0-9]+]]:vgpr_32 = PHI undef [[Reg45]]:vgpr_32, %bb.1, [[Reg50:%[0-9]+]]:vgpr_32, %bb.4 +; CHECK-NEXT: [[Reg51:%[0-9]+]]:vgpr_32 = PHI [[Reg36]]:vgpr_32, %bb.1, undef [[Reg52:%[0-9]+]]:vgpr_32, %bb.4 +; CHECK-NEXT: [[Reg53:%[0-9]+]]:vgpr_32 = PHI [[Reg34]]:vgpr_32, %bb.1, undef [[Reg54:%[0-9]+]]:vgpr_32, %bb.4 +; CHECK-NEXT: [[Reg55:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[Reg56:%[0-9]+]]:sreg_32 = SI_ELSE killed [[Reg41]]:sreg_32, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.3 +; EMPTY: +; CHECK: bb.3.loop1.latch1: +; CHECK-NEXT: ; predecessors: %bb.2 +; CHECK-NEXT: successors: %bb.5(0x80000000); %bb.5(100.00%) +; EMPTY: +; CHECK: [[Reg57:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg28]]:vgpr_32, killed [[Reg51]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg58:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, killed [[Reg53]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg59:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg58]]:vgpr_32, [[Reg12]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg60:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[Reg61:%[0-9]+]]:sreg_32 = S_ANDN2_B32 killed [[Reg42]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: [[Reg62:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[Reg59]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: [[Reg63:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[Reg61]]:sreg_32, killed [[Reg62]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: S_BRANCH %bb.5 +; EMPTY: +; CHECK: bb.4.loop1.latch2: +; CHECK-NEXT: ; predecessors: %bb.1 +; CHECK-NEXT: successors: %bb.2(0x80000000); %bb.2(100.00%) +; EMPTY: +; CHECK: [[Reg48]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg18]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p2, addrspace 1) +; CHECK-NEXT: [[Reg46]]:vgpr_32 = V_ADD_U32_e64 1, killed [[Reg36]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg50]]:vgpr_32 = V_ADD_U32_e64 [[Reg48]]:vgpr_32, [[Reg46]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg64:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg46]]:vgpr_32, [[Reg13]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg43]]:sreg_32 = S_ORN2_B32 killed [[Reg64]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: S_BRANCH %bb.2 +; EMPTY: +; CHECK: bb.5.Flow2: +; CHECK-NEXT: ; predecessors: %bb.2, %bb.3 +; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000); %bb.6(3.12%), %bb.1(96.88%) +; EMPTY: +; CHECK: [[Reg65:%[0-9]+]]:sreg_32 = PHI [[Reg42]]:sreg_32, %bb.2, [[Reg63]]:sreg_32, %bb.3 +; CHECK-NEXT: [[Reg37]]:vgpr_32 = PHI [[Reg44]]:vgpr_32, %bb.2, [[Reg60]]:vgpr_32, %bb.3 +; CHECK-NEXT: [[Reg35]]:vgpr_32 = PHI [[Reg55]]:vgpr_32, %bb.2, [[Reg58]]:vgpr_32, %bb.3 +; CHECK-NEXT: [[Reg66:%[0-9]+]]:vgpr_32 = PHI [[Reg47]]:vgpr_32, %bb.2, [[Reg28]]:vgpr_32, %bb.3 +; CHECK-NEXT: [[Reg39]]:vgpr_32 = PHI [[Reg49]]:vgpr_32, %bb.2, [[Reg57]]:vgpr_32, %bb.3 +; CHECK-NEXT: SI_END_CF killed [[Reg56]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg33]]:sreg_32 = SI_IF_BREAK killed [[Reg65]]:sreg_32, killed [[Reg32]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: SI_LOOP [[Reg33]]:sreg_32, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.6 +; EMPTY: +; CHECK: bb.6.bb1: +; CHECK-NEXT: ; predecessors: %bb.5 +; CHECK-NEXT: successors: %bb.7(0x80000000); %bb.7(100.00%) +; EMPTY: +; CHECK: SI_END_CF killed [[Reg33]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg67:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg39]]:vgpr_32, [[Reg28]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg17]]:vreg_64, [[Reg67]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg68:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; EMPTY: +; CHECK: bb.7.loop2: +; CHECK-NEXT: ; predecessors: %bb.6, %bb.7 +; CHECK-NEXT: successors: %bb.8(0x04000000), %bb.7(0x7c000000); %bb.8(3.12%), %bb.7(96.88%) +; EMPTY: +; CHECK: [[Reg69:%[0-9]+]]:sreg_32 = PHI [[Reg68]]:sreg_32, %bb.6, [[Reg70:%[0-9]+]]:sreg_32, %bb.7 +; CHECK-NEXT: [[Reg71:%[0-9]+]]:sreg_32 = PHI [[Reg68]]:sreg_32, %bb.6, [[Reg72:%[0-9]+]]:sreg_32, %bb.7 +; CHECK-NEXT: [[Reg73:%[0-9]+]]:vgpr_32 = PHI [[Reg39]]:vgpr_32, %bb.6, [[Reg28]]:vgpr_32, %bb.7 +; CHECK-NEXT: [[Reg72]]:sreg_32 = S_ADD_I32 killed [[Reg71]]:sreg_32, 2, implicit-def dead $scc +; CHECK-NEXT: [[Reg74:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg72]]:sreg_32, [[Reg14]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg70]]:sreg_32 = SI_IF_BREAK killed [[Reg74]]:sreg_32, killed [[Reg69]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: [[Reg75:%[0-9]+]]:vgpr_32 = COPY [[Reg72]]:sreg_32, implicit $exec +; CHECK-NEXT: SI_LOOP [[Reg70]]:sreg_32, %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.8 +; EMPTY: +; CHECK: bb.8.exit: +; CHECK-NEXT: ; predecessors: %bb.7 +; EMPTY: +; CHECK: SI_END_CF killed [[Reg70]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg76:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg73]]:vgpr_32, killed [[Reg75]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg77:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg76]]:vgpr_32, killed [[Reg39]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg78:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[Reg77]]:vgpr_32, [[Reg67]]:vgpr_32, killed [[Reg76]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg79:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg28]]:vgpr_32, killed [[Reg78]]:vgpr_32, 100, implicit $exec +; CHECK-NEXT: [[Reg80:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg79]]:vgpr_32, killed [[Reg73]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg81:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg77]]:vgpr_32, killed [[Reg66]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg82:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg80]]:vgpr_32, killed [[Reg81]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg83:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg16]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg84:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg82]]:vgpr_32, [[Reg83]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg85:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg77]]:vgpr_32, killed [[Reg83]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg86:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg15]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p5, addrspace 1) +; CHECK-NEXT: [[Reg87:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg85]]:vgpr_32, killed [[Reg86]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg88:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg84]]:vgpr_32, killed [[Reg87]]:vgpr_32, killed [[Reg38]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg16]]:vreg_64, killed [[Reg88]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p4, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg15]]:vreg_64, killed [[Reg67]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p5, addrspace 1) +; CHECK-NEXT: S_ENDPGM 0 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg14]] = 25039.0 +; CHECK-NEXT: Next-use distance of Register [[Reg13]] = 40.0 +; CHECK-NEXT: Next-use distance of Register [[Reg12]] = 47.0 +; CHECK-NEXT: Next-use distance of Register [[Reg11]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg10]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg5]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 34032.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 34028.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 25017.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 21.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg22]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 22.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 34018.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg53]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg56]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg57]] = 12.0 +; CHECK-NEXT: Next-use distance of Register [[Reg58]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg59]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg60]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg61]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg62]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg63]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg64]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg65]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg66]] = 34011.0 +; CHECK-NEXT: Next-use distance of Register [[Reg39]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg67]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg68]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg69]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg71]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg73]] = 9002.0 +; CHECK-NEXT: Next-use distance of Register [[Reg72]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg74]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg70]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg75]] = 9002.0 +; CHECK-NEXT: Next-use distance of Register [[Reg76]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg77]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg78]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg79]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg80]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg81]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg82]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg83]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg84]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg85]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg86]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg87]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg88]] = 1.0 +entry: +; entry +; | +; +-------->loop1.header<--------+ +; | / \ | +; +--loop1.latch1 loop1.latch2--+ +; \ / +; bb1 +; | +; +<-----+ +; loop2 | +; +------+ +; | +; exit + %ld1 = load i32, ptr addrspace(1) %p1, align 1 + %add1 = add i32 %ld1, 100 + br label %loop1.header + +loop1.header: + %phi.inc1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch1 ], [ 0, %loop1.latch2 ] + %phi.inc2 = phi i32 [ 10, %entry ], [ 0, %loop1.latch1 ], [ %inc2, %loop1.latch2 ] + %phi1 = phi i32 [ %ld1, %entry ], [ %sub, %loop1.latch1 ], [ %add2, %loop1.latch2 ] + br i1 %cond, label %loop1.latch1, label %loop1.latch2 + +loop1.latch1: + %sub = sub i32 %ld1, %phi.inc2 + %inc1 = add i32 %phi.inc1, 1 + %cond1 = icmp ult i32 %inc1, %TC1 + br i1 %cond1, label %loop1.header, label %bb1 + +loop1.latch2: + %ld2 = load i32, ptr addrspace(1) %p2, align 4 + %inc2 = add i32 %phi.inc2, 1 + %add2 = add i32 %ld2, %inc2 + %cond2 = icmp ult i32 %inc2, %TC2 + br i1 %cond2, label %loop1.header, label %bb1 + +bb1: + %phi2 = phi i32 [ %sub, %loop1.latch1 ], [ %add2, %loop1.latch2 ] + %ld3 = phi i32 [ %ld1, %loop1.latch1 ], [ %ld2, %loop1.latch2 ] + %mul = mul i32 %phi2, %ld1 + store i32 %mul, ptr addrspace(1) %p3 + br label %loop2 + +loop2: + %phi.inc3 = phi i32 [ 0, %bb1 ], [ %inc3, %loop2 ] + %phi3 = phi i32 [ %phi2, %bb1 ], [ %ld1, %loop2 ] + %inc3 = add i32 %phi.inc3, 2 + %add3 = add i32 %phi3, %inc3 + %cond3 = icmp ult i32 %inc3, %TC3 + br i1 %cond3, label %loop2, label %exit + +exit: + %add4 = add i32 %add3, %phi2 + %add5 = add i32 %add4, %mul + %add6 = add i32 %add5, %add3 + %add7 = add i32 %add6, %add1 + %mul2 = mul i32 %add7, %phi3 + %sub1 = sub i32 %add4, %ld3 + %mul3 = mul i32 %mul2, %sub1 + %ld4 = load i32, ptr addrspace(1) %p4, align 4 + %mul4 = mul i32 %mul3, %ld4 + %sub2 = sub i32 %add4, %ld4 + %ld5 = load i32, ptr addrspace(1) %p5, align 4 + %sub3 = sub i32 %sub2, %ld5 + %add8 = add i32 %mul4, %sub3 + %add9 = add i32 %add8, %phi1 + store i32 %add9, ptr addrspace(1) %p4, align 4 + store i32 %mul, ptr addrspace(1) %p5, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_common_dominator.ll b/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_common_dominator.ll new file mode 100644 index 0000000000000..a2c60eb103a69 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_common_dominator.ll @@ -0,0 +1,379 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +; +; bb.0.entry +; / | +; bb.1.bb1 | +; \ | +; bb.2.bb2 +; / | +; bb.5.bb4 | +; \ | +; bb.3.Flow3 +; / | +; bb.4.bb3 | +; \ | +; bb.6.bb5 +; / | +; bb.12.bb7 | +; \ | +; bb.7.Flow2 +; / | +; bb.8.bb6 | +; / | | +;bb.11.bb9 | | +; \ | | +; bb.9.Flow | +; / | | +;bb.10.bb8 | | +; \ | | +; bb.13.Flow1 | +; \ | +; bb.14.exit +; +define amdgpu_ps i32 @test(ptr addrspace(1) %p1, ptr addrspace(3) %p2, i1 %cond1, i1 %cond2) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000); %bb.1(50.00%), %bb.2(50.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg6:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg1]]:vgpr_32, %subreg.sub0, killed [[Reg2]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg7:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, killed [[Reg4]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg8:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 1, killed [[Reg7]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg9:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, killed [[Reg5]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg10:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 1, killed [[Reg9]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg11:%[0-9]+]]:sreg_32 = S_XOR_B32 [[Reg10]]:sreg_32, -1, implicit-def dead $scc +; CHECK-NEXT: [[Reg12:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg6]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg13:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg6]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg14:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg13]]:vgpr_32, 8, killed [[Reg12]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg15:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg6]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg16:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg6]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg17:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg16]]:vgpr_32, 8, killed [[Reg15]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg18:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg17]]:vgpr_32, 16, killed [[Reg14]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg19:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg6]]:vreg_64, 12, 0, implicit $exec :: (load (s8) from %ir.gep1, addrspace 1) +; CHECK-NEXT: [[Reg20:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg6]]:vreg_64, 13, 0, implicit $exec :: (load (s8) from %ir.gep1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg21:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg6]]:vreg_64, 14, 0, implicit $exec :: (load (s8) from %ir.gep1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg22:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[Reg6]]:vreg_64, 15, 0, implicit $exec :: (load (s8) from %ir.gep1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 100, [[Reg18]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg24:%[0-9]+]]:sreg_32 = SI_IF [[Reg8]]:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.1 +; EMPTY: +; CHECK: bb.1.bb1: +; CHECK-NEXT: ; predecessors: %bb.0 +; CHECK-NEXT: successors: %bb.2(0x80000000); %bb.2(100.00%) +; EMPTY: +; CHECK: [[Reg25:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 100, implicit $exec +; EMPTY: +; CHECK: bb.2.bb2: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.1 +; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.3(0x40000000); %bb.5(50.00%), %bb.3(50.00%) +; EMPTY: +; CHECK: [[Reg26:%[0-9]+]]:vgpr_32 = PHI [[Reg23]]:vgpr_32, %bb.0, [[Reg25]]:vgpr_32, %bb.1 +; CHECK-NEXT: SI_END_CF killed [[Reg24]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg27:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 0, 0, implicit $exec :: (load (s8) from %ir.p2, addrspace 3) +; CHECK-NEXT: [[Reg28:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 1, 0, implicit $exec :: (load (s8) from %ir.p2 + 1, addrspace 3) +; CHECK-NEXT: [[Reg29:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 2, 0, implicit $exec :: (load (s8) from %ir.p2 + 2, addrspace 3) +; CHECK-NEXT: [[Reg30:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 3, 0, implicit $exec :: (load (s8) from %ir.p2 + 3, addrspace 3) +; CHECK-NEXT: [[Reg31:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg28]]:vgpr_32, 8, killed [[Reg27]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg32:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg30]]:vgpr_32, 8, killed [[Reg29]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg33:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg32]]:vgpr_32, 16, killed [[Reg31]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg34:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg11]]:sreg_32, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.5 +; EMPTY: +; CHECK: bb.3.Flow3: +; CHECK-NEXT: ; predecessors: %bb.2, %bb.5 +; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.6(0x40000000); %bb.4(50.00%), %bb.6(50.00%) +; EMPTY: +; CHECK: [[Reg35:%[0-9]+]]:vgpr_32 = PHI undef [[Reg36:%[0-9]+]]:vgpr_32, %bb.2, [[Reg37:%[0-9]+]]:vgpr_32, %bb.5 +; CHECK-NEXT: [[Reg38:%[0-9]+]]:vgpr_32 = PHI [[Reg26]]:vgpr_32, %bb.2, undef [[Reg39:%[0-9]+]]:vgpr_32, %bb.5 +; CHECK-NEXT: [[Reg40:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg20]]:vgpr_32, 8, killed [[Reg19]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg41:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg22]]:vgpr_32, 8, killed [[Reg21]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg42:%[0-9]+]]:sreg_32 = SI_ELSE killed [[Reg34]]:sreg_32, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.4 +; EMPTY: +; CHECK: bb.4.bb3: +; CHECK-NEXT: ; predecessors: %bb.3 +; CHECK-NEXT: successors: %bb.6(0x80000000); %bb.6(100.00%) +; EMPTY: +; CHECK: [[Reg43:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 [[Reg18]]:vgpr_32, killed [[Reg38]]:vgpr_32, 1000, 0, implicit $exec +; CHECK-NEXT: [[Reg44:%[0-9]+]]:vgpr_32 = COPY killed [[Reg43]].sub0:vreg_64 +; CHECK-NEXT: S_BRANCH %bb.6 +; EMPTY: +; CHECK: bb.5.bb4: +; CHECK-NEXT: ; predecessors: %bb.2 +; CHECK-NEXT: successors: %bb.3(0x80000000); %bb.3(100.00%) +; EMPTY: +; CHECK: [[Reg37]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg23]]:vgpr_32, [[Reg33]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.3 +; EMPTY: +; CHECK: bb.6.bb5: +; CHECK-NEXT: ; predecessors: %bb.3, %bb.4 +; CHECK-NEXT: successors: %bb.12(0x40000000), %bb.7(0x40000000); %bb.12(50.00%), %bb.7(50.00%) +; EMPTY: +; CHECK: [[Reg45:%[0-9]+]]:vgpr_32 = PHI [[Reg35]]:vgpr_32, %bb.3, [[Reg44]]:vgpr_32, %bb.4 +; CHECK-NEXT: SI_END_CF killed [[Reg42]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg46:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg41]]:vgpr_32, 16, killed [[Reg40]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg47:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 killed [[Reg3]]:vgpr_32, 12, 0, implicit $exec :: (load (s32) from %ir.gep2, align 8, addrspace 3) +; CHECK-NEXT: [[Reg48:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg47]]:vgpr_32, killed [[Reg45]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg49:%[0-9]+]]:sreg_32 = S_XOR_B32 [[Reg8]]:sreg_32, [[Reg10]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: [[Reg50:%[0-9]+]]:sreg_32 = S_XOR_B32 killed [[Reg49]]:sreg_32, -1, implicit-def dead $scc +; CHECK-NEXT: [[Reg51:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg50]]:sreg_32, %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.12 +; EMPTY: +; CHECK: bb.7.Flow2: +; CHECK-NEXT: ; predecessors: %bb.6, %bb.12 +; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.14(0x40000000); %bb.8(50.00%), %bb.14(50.00%) +; EMPTY: +; CHECK: [[Reg52:%[0-9]+]]:vgpr_32 = PHI undef [[Reg53:%[0-9]+]]:vgpr_32, %bb.6, [[Reg54:%[0-9]+]]:vgpr_32, %bb.12 +; CHECK-NEXT: [[Reg55:%[0-9]+]]:vgpr_32 = PHI [[Reg46]]:vgpr_32, %bb.6, undef [[Reg56:%[0-9]+]]:vgpr_32, %bb.12 +; CHECK-NEXT: [[Reg57:%[0-9]+]]:vgpr_32 = PHI [[Reg48]]:vgpr_32, %bb.6, undef [[Reg58:%[0-9]+]]:vgpr_32, %bb.12 +; CHECK-NEXT: [[Reg59:%[0-9]+]]:sreg_32 = SI_ELSE killed [[Reg51]]:sreg_32, %bb.14, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.8 +; EMPTY: +; CHECK: bb.8.bb6: +; CHECK-NEXT: ; predecessors: %bb.7 +; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.9(0x40000000); %bb.11(50.00%), %bb.9(50.00%) +; EMPTY: +; CHECK: [[Reg60:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[Reg8]]:sreg_32, killed [[Reg10]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: [[Reg61:%[0-9]+]]:sreg_32 = S_XOR_B32 killed [[Reg60]]:sreg_32, -1, implicit-def dead $scc +; CHECK-NEXT: [[Reg62:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg61]]:sreg_32, %bb.9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.11 +; EMPTY: +; CHECK: bb.9.Flow: +; CHECK-NEXT: ; predecessors: %bb.8, %bb.11 +; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.13(0x40000000); %bb.10(50.00%), %bb.13(50.00%) +; EMPTY: +; CHECK: [[Reg63:%[0-9]+]]:vgpr_32 = PHI undef [[Reg64:%[0-9]+]]:vgpr_32, %bb.8, [[Reg65:%[0-9]+]]:vgpr_32, %bb.11 +; CHECK-NEXT: [[Reg66:%[0-9]+]]:vgpr_32 = PHI [[Reg55]]:vgpr_32, %bb.8, undef [[Reg67:%[0-9]+]]:vgpr_32, %bb.11 +; CHECK-NEXT: [[Reg68:%[0-9]+]]:vgpr_32 = PHI [[Reg57]]:vgpr_32, %bb.8, undef [[Reg69:%[0-9]+]]:vgpr_32, %bb.11 +; CHECK-NEXT: [[Reg70:%[0-9]+]]:sreg_32 = SI_ELSE killed [[Reg62]]:sreg_32, %bb.13, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.10 +; EMPTY: +; CHECK: bb.10.bb8: +; CHECK-NEXT: ; predecessors: %bb.9 +; CHECK-NEXT: successors: %bb.13(0x80000000); %bb.13(100.00%) +; EMPTY: +; CHECK: [[Reg71:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg66]]:vgpr_32, killed [[Reg68]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.13 +; EMPTY: +; CHECK: bb.11.bb9: +; CHECK-NEXT: ; predecessors: %bb.8 +; CHECK-NEXT: successors: %bb.9(0x80000000); %bb.9(100.00%) +; EMPTY: +; CHECK: [[Reg65]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg55]]:vgpr_32, killed [[Reg57]]:vgpr_32, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.9 +; EMPTY: +; CHECK: bb.12.bb7: +; CHECK-NEXT: ; predecessors: %bb.6 +; CHECK-NEXT: successors: %bb.7(0x80000000); %bb.7(100.00%) +; EMPTY: +; CHECK: [[Reg72:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg47]]:vgpr_32, killed [[Reg48]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg73:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg72]]:vgpr_32, [[Reg33]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg74:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[Reg18]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg75:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e64 0, killed [[Reg74]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg76:%[0-9]+]]:vgpr_32 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, 1333788670, 0, killed [[Reg75]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg77:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, killed [[Reg76]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg78:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 0, [[Reg18]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg79:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg78]]:vgpr_32, [[Reg77]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg80:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg77]]:vgpr_32, killed [[Reg79]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg81:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg77]]:vgpr_32, killed [[Reg80]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg82:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg73]]:vgpr_32, killed [[Reg81]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg83:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg82]]:vgpr_32, [[Reg18]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg84:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg73]]:vgpr_32, killed [[Reg83]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg85:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 [[Reg84]]:vgpr_32, [[Reg18]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg86:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg82]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg87:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg82]]:vgpr_32, 0, killed [[Reg86]]:vgpr_32, [[Reg85]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg88:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg84]]:vgpr_32, [[Reg18]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg89:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg84]]:vgpr_32, 0, killed [[Reg88]]:vgpr_32, killed [[Reg85]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg90:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 killed [[Reg89]]:vgpr_32, killed [[Reg18]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg91:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg87]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg92:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg87]]:vgpr_32, 0, killed [[Reg91]]:vgpr_32, killed [[Reg90]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg54]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg92]]:vgpr_32, killed [[Reg46]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.7 +; EMPTY: +; CHECK: bb.13.Flow1: +; CHECK-NEXT: ; predecessors: %bb.9, %bb.10 +; CHECK-NEXT: successors: %bb.14(0x80000000); %bb.14(100.00%) +; EMPTY: +; CHECK: [[Reg93:%[0-9]+]]:vgpr_32 = PHI [[Reg63]]:vgpr_32, %bb.9, [[Reg71]]:vgpr_32, %bb.10 +; CHECK-NEXT: SI_END_CF killed [[Reg70]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; EMPTY: +; CHECK: bb.14.exit: +; CHECK-NEXT: ; predecessors: %bb.7, %bb.13 +; EMPTY: +; CHECK: [[Reg94:%[0-9]+]]:vgpr_32 = PHI [[Reg52]]:vgpr_32, %bb.7, [[Reg93]]:vgpr_32, %bb.13 +; CHECK-NEXT: SI_END_CF killed [[Reg59]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg95:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg33]]:vgpr_32, killed [[Reg94]]:vgpr_32, 100, implicit $exec +; CHECK-NEXT: [[Reg96:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[Reg95]]:vgpr_32, implicit $exec +; CHECK-NEXT: $sgpr0 = COPY killed [[Reg96]]:sreg_32_xm0 +; CHECK-NEXT: SI_RETURN_TO_EPILOG killed $sgpr0 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg5]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 25.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg10]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg11]] = 24.0 +; CHECK-NEXT: Next-use distance of Register [[Reg12]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg13]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 20.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg22]] = 18.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 12.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg52]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg57]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg59]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg60]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg61]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg62]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg63]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg66]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg68]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg70]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg71]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg65]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg72]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg73]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg74]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg75]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg76]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg77]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg78]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg79]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg80]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg81]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg82]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg83]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg84]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg85]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg86]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg87]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg88]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg89]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg90]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg91]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg92]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg54]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg93]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg94]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg95]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg96]] = 1.0 +entry: +; entry +; / \ +; bb1 | +; \ | +; BB2 +; / \ +; BB3 BB4 +; \ / +; BB5 +; / \ +; BB6 BB7 +; / \ | +; BB8 BB9 | +; \ | | +; \ | / +; exit + %ld1 = load i32, ptr addrspace(1) %p1, align 1 + %gep1 = getelementptr inbounds i32, ptr addrspace(1) %p1, i64 3 + %ld2 = load i32, ptr addrspace(1) %gep1, align 1 + %add1 = add i32 %ld1, 100 + br i1 %cond1, label %bb1, label %bb2 + +bb1: + br label %bb2 + +bb2: + %phi0 = phi i32 [ 100, %bb1 ], [ %add1, %entry ] + %ld3 = load i32, ptr addrspace(3) %p2, align 1 + %add2 = add i32 %ld3, 100 + br i1 %cond2, label %bb3, label %bb4 + +bb3: + %mul1 = mul i32 %ld1, %phi0 + %add3 = add i32 %mul1, 1000 + br label %bb5 + +bb4: + %add4 = add i32 %add2, %ld1 + br label %bb5 + +bb5: + %phi1 = phi i32 [ %add3, %bb3 ], [ %add4, %bb4] + %gep2 = getelementptr inbounds i32, ptr addrspace(3) %p2, i64 3 + %ld4 = load i32, ptr addrspace(3) %gep2, align 8 + %add5 = add i32 %ld4, %phi1 + %xor = xor i1 %cond1, %cond2 + br i1 %xor, label %bb6, label %bb7 + +bb6: + %and = and i1 %cond1, %cond2 + br i1 %and, label %bb8, label %bb9 + +bb8: + %add6 = add i32 %ld2, %add5 + br label %exit + +bb9: + %mul2 = mul i32 %ld2, %add5 + br label %exit + +bb7: + %sub1 = sub i32 %ld4, %add5 + %mul3 = mul i32 %sub1, %ld3 + %div = udiv i32 %mul3, %ld1 + %add7 = add i32 %div, %ld2 + br label %exit + +exit: + %phi2 = phi i32 [ %add6, %bb8 ], [ %mul2, %bb9], [ %add7, %bb7 ] + %add8 = add i32 %add2, %phi2 + ret i32 %add8 +} diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_loop_preheader1.ll b/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_loop_preheader1.ll new file mode 100644 index 0000000000000..d5b724311eb5e --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_loop_preheader1.ll @@ -0,0 +1,182 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +; +; bb.0.entry +; | +; +<-----+ +; bb.1.loop | +; +------+ +; | +; bb.2.exit +; +define amdgpu_ps void @test(ptr addrspace(1) %p1, ptr addrspace(1) %p2, ptr addrspace(1) %p3, ptr addrspace(1) %p4, ptr addrspace(1) %p5, i32 %TC) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]], $vgpr5 in [[Reg6:%[0-9]+]], $vgpr6 in [[Reg7:%[0-9]+]], $vgpr7 in [[Reg8:%[0-9]+]], $vgpr8 in [[Reg9:%[0-9]+]], $vgpr9 in [[Reg10:%[0-9]+]], $vgpr10 in [[Reg11:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.1(0x80000000); %bb.1(100.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 +; CHECK-NEXT: [[Reg11]]:vgpr_32 = COPY killed $vgpr10 +; CHECK-NEXT: [[Reg10]]:vgpr_32 = COPY killed $vgpr9 +; CHECK-NEXT: [[Reg9]]:vgpr_32 = COPY killed $vgpr8 +; CHECK-NEXT: [[Reg8]]:vgpr_32 = COPY killed $vgpr7 +; CHECK-NEXT: [[Reg7]]:vgpr_32 = COPY killed $vgpr6 +; CHECK-NEXT: [[Reg6]]:vgpr_32 = COPY killed $vgpr5 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg12:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg9]]:vgpr_32, %subreg.sub0, killed [[Reg10]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg13:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg1]]:vgpr_32, %subreg.sub0, killed [[Reg2]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg14:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg7]]:vgpr_32, %subreg.sub0, killed [[Reg8]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg15:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg5]]:vgpr_32, %subreg.sub0, killed [[Reg6]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg16:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg3]]:vgpr_32, %subreg.sub0, killed [[Reg4]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg17:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg16]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p2, addrspace 1) +; CHECK-NEXT: [[Reg18:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg15]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg19:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg15]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p3 + 1, addrspace 1) +; CHECK-NEXT: [[Reg20:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg19]]:vgpr_32, 8, killed [[Reg18]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg21:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg15]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p3 + 2, addrspace 1) +; CHECK-NEXT: [[Reg22:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg15]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p3 + 3, addrspace 1) +; CHECK-NEXT: [[Reg23:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg22]]:vgpr_32, 8, killed [[Reg21]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg24:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg23]]:vgpr_32, 16, killed [[Reg20]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg25:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg14]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg25]]:vgpr_32, [[Reg11]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg16]]:vreg_64, [[Reg26]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p2, addrspace 1) +; CHECK-NEXT: [[Reg27:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg25]]:vgpr_32, [[Reg26]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg15]]:vreg_64, [[Reg27]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg28:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg27]]:vgpr_32, killed [[Reg26]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg29:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg28]]:vgpr_32, [[Reg11]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg14]]:vreg_64, killed [[Reg29]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg30:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 100, implicit $exec +; CHECK-NEXT: [[Reg31:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; EMPTY: +; CHECK: bb.1.loop: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.1 +; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000); %bb.2(3.12%), %bb.1(96.88%) +; EMPTY: +; CHECK: [[Reg32:%[0-9]+]]:sreg_32 = PHI [[Reg31]]:sreg_32, %bb.0, [[Reg33:%[0-9]+]]:sreg_32, %bb.1 +; CHECK-NEXT: [[Reg34:%[0-9]+]]:vgpr_32 = PHI [[Reg30]]:vgpr_32, %bb.0, [[Reg35:%[0-9]+]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg36:%[0-9]+]]:sreg_32 = PHI [[Reg31]]:sreg_32, %bb.0, [[Reg37:%[0-9]+]]:sreg_32, %bb.1 +; CHECK-NEXT: [[Reg38:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[Reg36]]:sreg_32, 31, implicit-def dead $scc +; CHECK-NEXT: [[Reg39:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[Reg36]]:sreg_32, %subreg.sub0, killed [[Reg38]]:sreg_32_xm0, %subreg.sub1 +; CHECK-NEXT: [[Reg40:%[0-9]+]]:sreg_64 = nsw S_LSHL_B64 killed [[Reg39]]:sreg_64, 2, implicit-def dead $scc +; CHECK-NEXT: [[Reg41:%[0-9]+]]:vgpr_32, [[Reg42:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[Reg13]].sub0:vreg_64, [[Reg40]].sub0:sreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg43:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 killed [[Reg40]].sub1:sreg_64, [[Reg13]].sub1:vreg_64, killed [[Reg42]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg44:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg41]]:vgpr_32, %subreg.sub0, killed [[Reg43]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg45:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg44]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.gep, addrspace 1) +; CHECK-NEXT: [[Reg46:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg44]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.gep + 1, addrspace 1) +; CHECK-NEXT: [[Reg47:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg46]]:vgpr_32, 8, killed [[Reg45]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg48:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg44]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.gep + 2, addrspace 1) +; CHECK-NEXT: [[Reg49:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[Reg44]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.gep + 3, addrspace 1) +; CHECK-NEXT: [[Reg50:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg49]]:vgpr_32, 8, killed [[Reg48]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg51:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg50]]:vgpr_32, 16, killed [[Reg47]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg35]]:vgpr_32 = V_ADD_U32_e64 [[Reg36]]:sreg_32, killed [[Reg51]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg37]]:sreg_32 = S_ADD_I32 killed [[Reg36]]:sreg_32, 1, implicit-def dead $scc +; CHECK-NEXT: [[Reg52:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg37]]:sreg_32, [[Reg11]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg33]]:sreg_32 = SI_IF_BREAK killed [[Reg52]]:sreg_32, killed [[Reg32]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: SI_LOOP [[Reg33]]:sreg_32, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.2 +; EMPTY: +; CHECK: bb.2.exit: +; CHECK-NEXT: ; predecessors: %bb.1 +; EMPTY: +; CHECK: SI_END_CF killed [[Reg33]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg53:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg24]]:vgpr_32, %subreg.sub0, undef [[Reg54:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg55:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 killed [[Reg17]]:vgpr_32, killed [[Reg34]]:vgpr_32, killed [[Reg53]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg12]]:vreg_64, killed [[Reg55]].sub0:vreg_64, 0, 0, implicit $exec :: (store (s32) into %ir.p5, addrspace 1) +; CHECK-NEXT: S_ENDPGM 0 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg11]] = 25.0 +; CHECK-NEXT: Next-use distance of Register [[Reg10]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg5]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg12]] = 22026.0 +; CHECK-NEXT: Next-use distance of Register [[Reg13]] = 28.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 22020.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg22]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 22012.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 22003.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg39]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg52]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg53]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 1.0 +entry: +; entry +; | +; +<----+ +; loop | +; +-----+ +; | +; exit + %ld1 = load i32, ptr addrspace(1) %p2, align 4 + %ld2 = load i32, ptr addrspace(1) %p3, align 1 + %ld3 = load i32, ptr addrspace(1) %p4 + %add1 = add i32 %ld3, %TC + store i32 %add1, ptr addrspace(1) %p2 + %mul1 = mul i32 %ld3, %add1 + store i32 %mul1, ptr addrspace(1) %p3 + %sub1 = sub i32 %mul1, %add1 + %mul2 = mul i32 %sub1, %TC + store i32 %mul2, ptr addrspace(1) %p4 + br label %loop + +loop: + %phi = phi i32 [ 100, %entry ], [ %add, %loop ] + %phi.inc = phi i32 [ 0, %entry ], [ %inc, %loop ] + %sext = sext i32 %phi.inc to i64 + %gep = getelementptr inbounds i32, ptr addrspace(1) %p1, i64 %sext + %ld = load i32, ptr addrspace(1) %gep, align 1 + %add = add i32 %ld, %phi.inc + %inc = add i32 %phi.inc, 1 + %cond = icmp ult i32 %inc, %TC + br i1 %cond, label %loop, label %exit + +exit: + %mul3 = mul i32 %ld1, %phi + %add2 = add i32 %mul3, %ld2 + store i32 %add2, ptr addrspace(1) %p5 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_loop_preheader2.ll b/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_loop_preheader2.ll new file mode 100644 index 0000000000000..02b65647a800a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_loop_preheader2.ll @@ -0,0 +1,248 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +; +; bb.0.entry +; | +; +<--------+ +; bb.1.loop1 | +; +---------+ +; | +; bb.2.bb +; | +; +<--------+ +; bb.3.loop2 | +; +---------+ +; | +; bb.4.exit +; +define amdgpu_ps i32 @test(ptr addrspace(1) %p1, ptr addrspace(1) %p2, ptr addrspace(1) %p3, i32 %TC1, i32 %TC2) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]], $vgpr5 in [[Reg6:%[0-9]+]], $vgpr6 in [[Reg7:%[0-9]+]], $vgpr7 in [[Reg8:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.1(0x80000000); %bb.1(100.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7 +; CHECK-NEXT: [[Reg8]]:vgpr_32 = COPY killed $vgpr7 +; CHECK-NEXT: [[Reg7]]:vgpr_32 = COPY killed $vgpr6 +; CHECK-NEXT: [[Reg6]]:vgpr_32 = COPY killed $vgpr5 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg9:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg5]]:vgpr_32, %subreg.sub0, killed [[Reg6]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg10:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg3]]:vgpr_32, %subreg.sub0, killed [[Reg4]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg11:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg1]]:vgpr_32, %subreg.sub0, killed [[Reg2]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg12:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg11]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg13:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg11]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg14:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg13]]:vgpr_32, 8, killed [[Reg12]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg15:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg11]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg16:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg11]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg17:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg16]]:vgpr_32, 8, killed [[Reg15]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg18:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg17]]:vgpr_32, 16, killed [[Reg14]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg19:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 100, implicit $exec +; CHECK-NEXT: [[Reg20:%[0-9]+]]:sreg_32 = S_MOV_B32 1 +; CHECK-NEXT: [[Reg21:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; EMPTY: +; CHECK: bb.1.loop1: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.1 +; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000); %bb.2(3.12%), %bb.1(96.88%) +; EMPTY: +; CHECK: [[Reg22:%[0-9]+]]:sreg_32 = PHI [[Reg21]]:sreg_32, %bb.0, [[Reg23:%[0-9]+]]:sreg_32, %bb.1 +; CHECK-NEXT: [[Reg24:%[0-9]+]]:sreg_32 = PHI [[Reg21]]:sreg_32, %bb.0, [[Reg25:%[0-9]+]]:sreg_32, %bb.1 +; CHECK-NEXT: [[Reg26:%[0-9]+]]:sreg_32 = PHI [[Reg20]]:sreg_32, %bb.0, [[Reg27:%[0-9]+]]:sreg_32, %bb.1 +; CHECK-NEXT: [[Reg28:%[0-9]+]]:vgpr_32 = PHI [[Reg18]]:vgpr_32, %bb.0, [[Reg29:%[0-9]+]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg30:%[0-9]+]]:vgpr_32 = PHI [[Reg19]]:vgpr_32, %bb.0, [[Reg31:%[0-9]+]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg32:%[0-9]+]]:vgpr_32 = PHI [[Reg18]]:vgpr_32, %bb.0, [[Reg33:%[0-9]+]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg34:%[0-9]+]]:sreg_32 = S_ADD_I32 [[Reg26]]:sreg_32, -1, implicit-def dead $scc +; CHECK-NEXT: [[Reg35:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[Reg34]]:sreg_32, 31, implicit-def dead $scc +; CHECK-NEXT: [[Reg36:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[Reg34]]:sreg_32, %subreg.sub0, killed [[Reg35]]:sreg_32_xm0, %subreg.sub1 +; CHECK-NEXT: [[Reg37:%[0-9]+]]:sreg_64 = nsw S_LSHL_B64 killed [[Reg36]]:sreg_64, 2, implicit-def dead $scc +; CHECK-NEXT: [[Reg38:%[0-9]+]]:vgpr_32, [[Reg39:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[Reg10]].sub0:vreg_64, [[Reg37]].sub0:sreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg40:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 [[Reg37]].sub1:sreg_64, [[Reg10]].sub1:vreg_64, killed [[Reg39]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg41:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg38]]:vgpr_32, %subreg.sub0, killed [[Reg40]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg42:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[Reg41]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.gep1, addrspace 1) +; CHECK-NEXT: [[Reg29]]:vgpr_32 = V_ADD_U32_e64 [[Reg26]]:sreg_32, [[Reg42]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg31]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg42]]:vgpr_32, [[Reg26]]:sreg_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_SHORT_D16_HI [[Reg11]]:vreg_64, [[Reg31]]:vgpr_32, 2, 0, implicit $exec :: (store (s16) into %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_SHORT [[Reg11]]:vreg_64, [[Reg31]]:vgpr_32, 0, 0, implicit $exec :: (store (s16) into %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg43:%[0-9]+]]:vgpr_32, [[Reg44:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[Reg9]].sub0:vreg_64, [[Reg37]].sub0:sreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg45:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 killed [[Reg37]].sub1:sreg_64, [[Reg9]].sub1:vreg_64, killed [[Reg44]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg46:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg43]]:vgpr_32, %subreg.sub0, killed [[Reg45]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg47:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg46]]:vreg_64, 0, 0, implicit $exec :: (load (s16) from %ir.gep2, addrspace 1) +; CHECK-NEXT: [[Reg48:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT killed [[Reg46]]:vreg_64, 2, 0, implicit $exec :: (load (s16) from %ir.gep2 + 2, addrspace 1) +; CHECK-NEXT: [[Reg49:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg48]]:vgpr_32, 16, killed [[Reg47]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg27]]:sreg_32 = S_ADD_I32 [[Reg26]]:sreg_32, 1, implicit-def dead $scc +; CHECK-NEXT: [[Reg25]]:sreg_32 = S_ADD_I32 [[Reg24]]:sreg_32, -1, implicit-def dead $scc +; CHECK-NEXT: [[Reg33]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg24]]:sreg_32, killed [[Reg49]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg50:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 killed [[Reg26]]:sreg_32, [[Reg7]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg23]]:sreg_32 = SI_IF_BREAK killed [[Reg50]]:sreg_32, killed [[Reg22]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: [[Reg51:%[0-9]+]]:vgpr_32 = COPY [[Reg27]]:sreg_32, implicit $exec +; CHECK-NEXT: SI_LOOP [[Reg23]]:sreg_32, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.2 +; EMPTY: +; CHECK: bb.2.bb: +; CHECK-NEXT: ; predecessors: %bb.1 +; CHECK-NEXT: successors: %bb.3(0x80000000); %bb.3(100.00%) +; EMPTY: +; CHECK: SI_END_CF killed [[Reg23]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, killed [[Reg51]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg53:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg31]]:vgpr_32, killed [[Reg52]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg54:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 100, killed [[Reg28]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg9]]:vreg_64, [[Reg54]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg55:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; EMPTY: +; CHECK: bb.3.loop2: +; CHECK-NEXT: ; predecessors: %bb.2, %bb.3 +; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.3(0x7c000000); %bb.4(3.12%), %bb.3(96.88%) +; EMPTY: +; CHECK: [[Reg56:%[0-9]+]]:sreg_32 = PHI [[Reg55]]:sreg_32, %bb.2, [[Reg57:%[0-9]+]]:sreg_32, %bb.3 +; CHECK-NEXT: [[Reg58:%[0-9]+]]:sreg_32 = PHI [[Reg55]]:sreg_32, %bb.2, [[Reg59:%[0-9]+]]:sreg_32, %bb.3 +; CHECK-NEXT: [[Reg60:%[0-9]+]]:vgpr_32 = PHI [[Reg32]]:vgpr_32, %bb.2, [[Reg53]]:vgpr_32, %bb.3 +; CHECK-NEXT: [[Reg59]]:sreg_32 = S_ADD_I32 killed [[Reg58]]:sreg_32, 2, implicit-def dead $scc +; CHECK-NEXT: [[Reg61:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg59]]:sreg_32, [[Reg8]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg57]]:sreg_32 = SI_IF_BREAK killed [[Reg61]]:sreg_32, killed [[Reg56]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: [[Reg62:%[0-9]+]]:vgpr_32 = COPY [[Reg59]]:sreg_32, implicit $exec +; CHECK-NEXT: SI_LOOP [[Reg57]]:sreg_32, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.4 +; EMPTY: +; CHECK: bb.4.exit: +; CHECK-NEXT: ; predecessors: %bb.3 +; EMPTY: +; CHECK: SI_END_CF killed [[Reg57]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg9]]:vreg_64, [[Reg60]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg63:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg60]]:vgpr_32, killed [[Reg62]]:vgpr_32, killed [[Reg30]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg64:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg63]]:vgpr_32, killed [[Reg54]]:vgpr_32, killed [[Reg42]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg65:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg18]]:vgpr_32, killed [[Reg64]]:vgpr_32, 100, implicit $exec +; CHECK-NEXT: [[Reg66:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[Reg65]]:vgpr_32, implicit $exec +; CHECK-NEXT: $sgpr0 = COPY killed [[Reg66]]:sreg_32_xm0 +; CHECK-NEXT: SI_RETURN_TO_EPILOG killed $sgpr0 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg8]] = 32031.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 47.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg5]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 31.0 +; CHECK-NEXT: Next-use distance of Register [[Reg10]] = 22.0 +; CHECK-NEXT: Next-use distance of Register [[Reg11]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg12]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg13]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg22]] = 28.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 24.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 32004.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 41009.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 32009.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg39]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 21.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 32002.0 +; CHECK-NEXT: Next-use distance of Register [[Reg52]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg53]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg54]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg56]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg58]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg60]] = 9002.0 +; CHECK-NEXT: Next-use distance of Register [[Reg59]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg61]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg57]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg62]] = 9003.0 +; CHECK-NEXT: Next-use distance of Register [[Reg63]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg64]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg65]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg66]] = 1.0 +entry: +; entry +; | +; +<-----+ +; loop1 | +; +------+ +; | +; bb +; | +; +<-----+ +; loop2 | +; +------+ +; | +; exit + %ld1 = load i32, ptr addrspace(1) %p1, align 1 + %add1 = add i32 %ld1, 100 + br label %loop1 + +loop1: + %phi.inc1 = phi i32 [ 0, %entry ], [ %inc1, %loop1 ] + %phi1 = phi i32 [ %ld1, %entry ], [ %add2, %loop1 ] + %phi2 = phi i32 [ 100, %entry ], [ %mul1, %loop1 ] + %phi3 = phi i32 [ %ld1, %entry ], [ %sub, %loop1 ] + %sext1 = sext i32 %phi.inc1 to i64 + %gep1 = getelementptr inbounds i32, ptr addrspace(1) %p2, i64 %sext1 + %ld2 = load i32, ptr addrspace(1) %gep1, align 4 + %inc1 = add i32 %phi.inc1, 1 + %add2 = add i32 %ld2, %inc1 + %mul1 = mul i32 %ld2, %inc1 + store i32 %mul1, ptr addrspace(1) %p1, align 2 + %mul2 = mul i32 %mul1, %phi.inc1 + %sext2 = sext i32 %inc1 to i64 + %gep2 = getelementptr inbounds i32, ptr addrspace(1) %p3, i64 %sext1 + %ld3 = load i32, ptr addrspace(1) %gep2, align 2 + %sub = sub i32 %ld3, %phi.inc1 + %cond1 = icmp ult i32 %inc1, %TC1 + br i1 %cond1, label %loop1, label %bb + +bb: + %mul3 = mul i32 %phi1, 100 + store i32 %mul3, ptr addrspace(1) %p3 + br label %loop2 + +loop2: + %phi.inc2 = phi i32 [ 0, %bb ], [ %inc2, %loop2 ] + %phi4 = phi i32 [ %phi3, %bb ], [ %mul2, %loop2 ] + %inc2 = add i32 %phi.inc2, 2 + store i32 %phi4, ptr addrspace(1) %p3 + %add3 = add i32 %phi4, %inc2 + %cond2 = icmp ult i32 %inc2, %TC2 + br i1 %cond2, label %loop2, label %exit + +exit: + %add4 = add i32 %add3, %phi2 + %add5 = add i32 %add4, %mul3 + %add6 = add i32 %add5, %ld2 + %add7 = add i32 %add6, %add1 + ret i32 %add7 +} + diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_loop_preheader3.ll b/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_loop_preheader3.ll new file mode 100644 index 0000000000000..38d511b3af02d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_loop_preheader3.ll @@ -0,0 +1,651 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +; bb.0.entry +; / | +; bb.3.bb3 | +; \ | +; bb.1.Flow12 +; / | +; bb.2.bb2 | +; \ | +; bb.4.bb4 +; | +; bb.5.loop1.header<-------+ +; | | +; bb.6.loop2.header<-----+ | +; | | | +; bb.7.loop3.header<---+ | | +; / | | | | +; bb.8.bb5 | | | | +; \ | | | | +; bb.9.loop3.latch-----+ | | +; | | | +; bb.10.loop2.latch------+ | +; | | +; bb.11.loop4.preheader | +; | | +; bb.12.loop4<----+ | +; +----------+ | +; | | +; | | +; bb.13.loop1.latch--------+ +; | +; bb.14.bb6 +; / | +; bb.15.bb7 | +; \ | +; bb.16.loop5.preheader +; | +; +-->bb.17.loop5 +; +--------+ +; | +; bb.18.exit +define amdgpu_ps i32 @test (ptr addrspace(1) %p1, ptr addrspace(1) %p2, ptr addrspace(1) %p3, ptr addrspace(1) %p4, ptr addrspace(1) %p5, ptr addrspace(1) %p6, ptr addrspace(1) %p7, ptr addrspace(1) %p8, ptr addrspace(1) %p9, ptr addrspace(1) %p10, ptr addrspace(1) %p11, i32 %TC1, i32 %TC2, i32 %TC3, i32 %TC4, i32 %TC5, i32 %Val1, i32 %Val2, i1 %cond1) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]], $vgpr5 in [[Reg6:%[0-9]+]], $vgpr6 in [[Reg7:%[0-9]+]], $vgpr7 in [[Reg8:%[0-9]+]], $vgpr8 in [[Reg9:%[0-9]+]], $vgpr9 in [[Reg10:%[0-9]+]], $vgpr10 in [[Reg11:%[0-9]+]], $vgpr11 in [[Reg12:%[0-9]+]], $vgpr12 in [[Reg13:%[0-9]+]], $vgpr13 in [[Reg14:%[0-9]+]], $vgpr14 in [[Reg15:%[0-9]+]], $vgpr15 in [[Reg16:%[0-9]+]], $vgpr16 in [[Reg17:%[0-9]+]], $vgpr17 in [[Reg18:%[0-9]+]], $vgpr18 in [[Reg19:%[0-9]+]], $vgpr19 in [[Reg20:%[0-9]+]], $vgpr20 in [[Reg21:%[0-9]+]], $vgpr21 in [[Reg22:%[0-9]+]], $vgpr22 in [[Reg23:%[0-9]+]], $vgpr23 in [[Reg24:%[0-9]+]], $vgpr24 in [[Reg25:%[0-9]+]], $vgpr25 in [[Reg26:%[0-9]+]], $vgpr26 in [[Reg27:%[0-9]+]], $vgpr27 in [[Reg28:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000); %bb.3(50.00%), %bb.1(50.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27 +; CHECK-NEXT: [[Reg28]]:vgpr_32 = COPY killed $vgpr27 +; CHECK-NEXT: [[Reg27]]:vgpr_32 = COPY killed $vgpr26 +; CHECK-NEXT: [[Reg26]]:vgpr_32 = COPY killed $vgpr25 +; CHECK-NEXT: [[Reg25]]:vgpr_32 = COPY killed $vgpr24 +; CHECK-NEXT: [[Reg24]]:vgpr_32 = COPY killed $vgpr23 +; CHECK-NEXT: [[Reg23]]:vgpr_32 = COPY killed $vgpr22 +; CHECK-NEXT: [[Reg22]]:vgpr_32 = COPY killed $vgpr21 +; CHECK-NEXT: [[Reg21]]:vgpr_32 = COPY killed $vgpr20 +; CHECK-NEXT: [[Reg20]]:vgpr_32 = COPY killed $vgpr19 +; CHECK-NEXT: [[Reg19]]:vgpr_32 = COPY killed $vgpr18 +; CHECK-NEXT: [[Reg18]]:vgpr_32 = COPY killed $vgpr17 +; CHECK-NEXT: [[Reg17]]:vgpr_32 = COPY killed $vgpr16 +; CHECK-NEXT: [[Reg16]]:vgpr_32 = COPY killed $vgpr15 +; CHECK-NEXT: [[Reg15]]:vgpr_32 = COPY killed $vgpr14 +; CHECK-NEXT: [[Reg14]]:vgpr_32 = COPY killed $vgpr13 +; CHECK-NEXT: [[Reg13]]:vgpr_32 = COPY killed $vgpr12 +; CHECK-NEXT: [[Reg12]]:vgpr_32 = COPY killed $vgpr11 +; CHECK-NEXT: [[Reg11]]:vgpr_32 = COPY killed $vgpr10 +; CHECK-NEXT: [[Reg10]]:vgpr_32 = COPY killed $vgpr9 +; CHECK-NEXT: [[Reg9]]:vgpr_32 = COPY killed $vgpr8 +; CHECK-NEXT: [[Reg8]]:vgpr_32 = COPY killed $vgpr7 +; CHECK-NEXT: [[Reg7]]:vgpr_32 = COPY killed $vgpr6 +; CHECK-NEXT: [[Reg6]]:vgpr_32 = COPY killed $vgpr5 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg29:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg1]]:vgpr_32, %subreg.sub0, killed [[Reg2]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg30:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, killed [[Reg28]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg31:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 1, killed [[Reg30]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg32:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg29]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg33:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg29]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg34:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg33]]:vgpr_32, 8, killed [[Reg32]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg35:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg29]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg36:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg29]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg37:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg36]]:vgpr_32, 8, killed [[Reg35]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg38:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg37]]:vgpr_32, 16, killed [[Reg34]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg39:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg31]]:sreg_32, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.3 +; EMPTY: +; CHECK: bb.1.Flow: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.3 +; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000); %bb.2(50.00%), %bb.4(50.00%) +; EMPTY: +; CHECK: [[Reg40:%[0-9]+]]:sreg_32 = SI_ELSE killed [[Reg39]]:sreg_32, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.2 +; EMPTY: +; CHECK: bb.2.bb2: +; CHECK-NEXT: ; predecessors: %bb.1 +; CHECK-NEXT: successors: %bb.4(0x80000000); %bb.4(100.00%) +; EMPTY: +; CHECK: [[Reg41:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg26]]:vgpr_32, [[Reg38]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg29]]:vreg_64, killed [[Reg41]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p1, addrspace 1) +; CHECK-NEXT: S_BRANCH %bb.4 +; EMPTY: +; CHECK: bb.3.bb3: +; CHECK-NEXT: ; predecessors: %bb.0 +; CHECK-NEXT: successors: %bb.1(0x80000000); %bb.1(100.00%) +; EMPTY: +; CHECK: [[Reg42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg26]]:vgpr_32, [[Reg38]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_SHORT_D16_HI [[Reg29]]:vreg_64, [[Reg42]]:vgpr_32, 2, 0, implicit $exec :: (store (s16) into %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_SHORT [[Reg29]]:vreg_64, killed [[Reg42]]:vgpr_32, 0, 0, implicit $exec :: (store (s16) into %ir.p1, addrspace 1) +; CHECK-NEXT: S_BRANCH %bb.1 +; EMPTY: +; CHECK: bb.4.bb4: +; CHECK-NEXT: ; predecessors: %bb.1, %bb.2 +; CHECK-NEXT: successors: %bb.5(0x80000000); %bb.5(100.00%) +; EMPTY: +; CHECK: SI_END_CF killed [[Reg40]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg43:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg19]]:vgpr_32, %subreg.sub0, killed [[Reg20]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg44:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg17]]:vgpr_32, %subreg.sub0, killed [[Reg18]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg45:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg15]]:vgpr_32, %subreg.sub0, killed [[Reg16]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg46:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg13]]:vgpr_32, %subreg.sub0, killed [[Reg14]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg47:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg11]]:vgpr_32, %subreg.sub0, killed [[Reg12]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg48:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg9]]:vgpr_32, %subreg.sub0, killed [[Reg10]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg49:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg7]]:vgpr_32, %subreg.sub0, killed [[Reg8]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg50:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg5]]:vgpr_32, %subreg.sub0, killed [[Reg6]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg51:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg3]]:vgpr_32, %subreg.sub0, killed [[Reg4]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg52:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg29]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg53:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg29]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg54:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg53]]:vgpr_32, 8, killed [[Reg52]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg55:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg29]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg56:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg29]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg57:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg56]]:vgpr_32, 8, killed [[Reg55]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg58:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg57]]:vgpr_32, 16, killed [[Reg54]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg59:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg38]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg60:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; CHECK-NEXT: [[Reg61:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; EMPTY: +; CHECK: bb.5.loop1.header: +; CHECK-NEXT: ; predecessors: %bb.4, %bb.13 +; CHECK-NEXT: successors: %bb.6(0x80000000); %bb.6(100.00%) +; EMPTY: +; CHECK: [[Reg62:%[0-9]+]]:sreg_32 = PHI [[Reg60]]:sreg_32, %bb.4, [[Reg63:%[0-9]+]]:sreg_32, %bb.13 +; CHECK-NEXT: [[Reg64:%[0-9]+]]:vgpr_32 = PHI [[Reg59]]:vgpr_32, %bb.4, [[Reg65:%[0-9]+]]:vgpr_32, %bb.13 +; CHECK-NEXT: [[Reg66:%[0-9]+]]:vgpr_32 = PHI [[Reg38]]:vgpr_32, %bb.4, [[Reg67:%[0-9]+]]:vgpr_32, %bb.13 +; CHECK-NEXT: [[Reg68:%[0-9]+]]:vgpr_32 = PHI [[Reg61]]:vgpr_32, %bb.4, [[Reg69:%[0-9]+]]:vgpr_32, %bb.13 +; CHECK-NEXT: [[Reg70:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[Reg66]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg71:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[Reg66]]:vgpr_32, %subreg.sub0, killed [[Reg70]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg72:%[0-9]+]]:vreg_64 = nsw V_LSHLREV_B64_pseudo_e64 3, killed [[Reg71]]:vreg_64, implicit $exec +; CHECK-NEXT: [[Reg73:%[0-9]+]]:vgpr_32, [[Reg74:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[Reg29]].sub0:vreg_64, [[Reg72]].sub0:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg75:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 [[Reg29]].sub1:vreg_64, killed [[Reg72]].sub1:vreg_64, killed [[Reg74]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg76:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg73]]:vgpr_32, %subreg.sub0, killed [[Reg75]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg77:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg76]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.gep1, addrspace 1) +; CHECK-NEXT: [[Reg78:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg76]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.gep1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg79:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg78]]:vgpr_32, 8, killed [[Reg77]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg80:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg76]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.gep1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg81:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[Reg76]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.gep1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg82:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg81]]:vgpr_32, 8, killed [[Reg80]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg83:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg82]]:vgpr_32, 16, killed [[Reg79]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg84:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg26]]:vgpr_32, [[Reg66]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_SHORT [[Reg29]]:vreg_64, [[Reg84]]:vgpr_32, 0, 0, implicit $exec :: (store (s16) into %ir.p1, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_SHORT_D16_HI [[Reg29]]:vreg_64, [[Reg84]]:vgpr_32, 2, 0, implicit $exec :: (store (s16) into %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg85:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg83]]:vgpr_32, [[Reg64]]:vgpr_32, implicit $exec +; EMPTY: +; CHECK: bb.6.loop2.header: +; CHECK-NEXT: ; predecessors: %bb.5, %bb.10 +; CHECK-NEXT: successors: %bb.7(0x80000000); %bb.7(100.00%) +; EMPTY: +; CHECK: [[Reg86:%[0-9]+]]:sreg_32 = PHI [[Reg60]]:sreg_32, %bb.5, [[Reg87:%[0-9]+]]:sreg_32, %bb.10 +; CHECK-NEXT: [[Reg88:%[0-9]+]]:vgpr_32 = PHI [[Reg85]]:vgpr_32, %bb.5, [[Reg89:%[0-9]+]]:vgpr_32, %bb.10 +; CHECK-NEXT: [[Reg90:%[0-9]+]]:vgpr_32 = PHI [[Reg83]]:vgpr_32, %bb.5, [[Reg91:%[0-9]+]]:vgpr_32, %bb.10 +; CHECK-NEXT: [[Reg92:%[0-9]+]]:vgpr_32 = PHI [[Reg66]]:vgpr_32, %bb.5, [[Reg93:%[0-9]+]]:vgpr_32, %bb.10 +; CHECK-NEXT: [[Reg94:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; EMPTY: +; CHECK: bb.7.loop3.header: +; CHECK-NEXT: ; predecessors: %bb.6, %bb.9 +; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000); %bb.8(50.00%), %bb.9(50.00%) +; EMPTY: +; CHECK: [[Reg95:%[0-9]+]]:sreg_32 = PHI [[Reg94]]:sreg_32, %bb.6, [[Reg96:%[0-9]+]]:sreg_32, %bb.9 +; CHECK-NEXT: [[Reg97:%[0-9]+]]:sreg_32 = PHI [[Reg94]]:sreg_32, %bb.6, [[Reg98:%[0-9]+]]:sreg_32, %bb.9 +; CHECK-NEXT: [[Reg99:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg97]]:sreg_32, [[Reg90]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg100:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg51]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p2, addrspace 1) +; CHECK-NEXT: [[Reg101:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg51]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p2 + 1, addrspace 1) +; CHECK-NEXT: [[Reg102:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg101]]:vgpr_32, 8, killed [[Reg100]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg103:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg51]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p2 + 2, addrspace 1) +; CHECK-NEXT: [[Reg104:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg51]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p2 + 3, addrspace 1) +; CHECK-NEXT: [[Reg105:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg104]]:vgpr_32, 8, killed [[Reg103]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg106:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg105]]:vgpr_32, 16, killed [[Reg102]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg107:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg99]]:vgpr_32, [[Reg106]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg108:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg107]]:sreg_32, %bb.9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.8 +; EMPTY: +; CHECK: bb.8.bb5: +; CHECK-NEXT: ; predecessors: %bb.7 +; CHECK-NEXT: successors: %bb.9(0x80000000); %bb.9(100.00%) +; EMPTY: +; CHECK: [[Reg109:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg97]]:sreg_32, [[Reg88]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg110:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg109]]:vgpr_32, killed [[Reg106]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg111:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg110]]:vgpr_32, [[Reg84]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg46]]:vreg_64, [[Reg111]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p7, addrspace 1) +; EMPTY: +; CHECK: bb.9.loop3.latch: +; CHECK-NEXT: ; predecessors: %bb.7, %bb.8 +; CHECK-NEXT: successors: %bb.10(0x04000000), %bb.7(0x7c000000); %bb.10(3.12%), %bb.7(96.88%) +; EMPTY: +; CHECK: [[Reg69]]:vgpr_32 = PHI [[Reg83]]:vgpr_32, %bb.7, [[Reg111]]:vgpr_32, %bb.8 +; CHECK-NEXT: [[Reg112:%[0-9]+]]:vgpr_32 = PHI [[Reg99]]:vgpr_32, %bb.7, [[Reg110]]:vgpr_32, %bb.8 +; CHECK-NEXT: [[Reg93]]:vgpr_32 = PHI [[Reg99]]:vgpr_32, %bb.7, [[Reg90]]:vgpr_32, %bb.8 +; CHECK-NEXT: SI_END_CF killed [[Reg108]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg45]]:vreg_64, [[Reg93]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p9, addrspace 1) +; CHECK-NEXT: [[Reg113:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg44]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p10, addrspace 1) +; CHECK-NEXT: [[Reg114:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg112]]:vgpr_32, [[Reg69]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg44]]:vreg_64, [[Reg114]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p10, addrspace 1) +; CHECK-NEXT: [[Reg98]]:sreg_32 = S_ADD_I32 killed [[Reg97]]:sreg_32, 1, implicit-def dead $scc +; CHECK-NEXT: [[Reg115:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg98]]:sreg_32, [[Reg90]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg116:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 killed [[Reg115]]:vgpr_32, [[Reg23]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg96]]:sreg_32 = SI_IF_BREAK killed [[Reg116]]:sreg_32, killed [[Reg95]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: SI_LOOP [[Reg96]]:sreg_32, %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.10 +; EMPTY: +; CHECK: bb.10.loop2.latch: +; CHECK-NEXT: ; predecessors: %bb.9 +; CHECK-NEXT: successors: %bb.11(0x04000000), %bb.6(0x7c000000); %bb.11(3.12%), %bb.6(96.88%) +; EMPTY: +; CHECK: SI_END_CF killed [[Reg96]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg91]]:vgpr_32 = V_ADD_U32_e64 1, killed [[Reg90]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg117:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg43]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p11, addrspace 1) +; CHECK-NEXT: [[Reg118:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg91]]:vgpr_32, [[Reg112]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg43]]:vreg_64, [[Reg118]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p11, addrspace 1) +; CHECK-NEXT: [[Reg89]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg88]]:vgpr_32, [[Reg64]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg119:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg91]]:vgpr_32, [[Reg22]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg87]]:sreg_32 = SI_IF_BREAK killed [[Reg119]]:sreg_32, killed [[Reg86]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: SI_LOOP [[Reg87]]:sreg_32, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.11 +; EMPTY: +; CHECK: bb.11.loop4.preheader: +; CHECK-NEXT: ; predecessors: %bb.10 +; CHECK-NEXT: successors: %bb.12(0x80000000); %bb.12(100.00%) +; EMPTY: +; CHECK: SI_END_CF killed [[Reg87]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg120:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[Reg69]]:vgpr_32, %subreg.sub0, undef [[Reg121:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg122:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 [[Reg69]]:vgpr_32, killed [[Reg112]]:vgpr_32, killed [[Reg120]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg123:%[0-9]+]]:vgpr_32 = COPY killed [[Reg122]].sub0:vreg_64 +; CHECK-NEXT: [[Reg124:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; EMPTY: +; CHECK: bb.12.loop4: +; CHECK-NEXT: ; predecessors: %bb.11, %bb.12 +; CHECK-NEXT: successors: %bb.13(0x04000000), %bb.12(0x7c000000); %bb.13(3.12%), %bb.12(96.88%) +; EMPTY: +; CHECK: [[Reg125:%[0-9]+]]:sreg_32 = PHI [[Reg124]]:sreg_32, %bb.11, [[Reg126:%[0-9]+]]:sreg_32, %bb.12 +; CHECK-NEXT: [[Reg127:%[0-9]+]]:vgpr_32 = PHI [[Reg123]]:vgpr_32, %bb.11, [[Reg128:%[0-9]+]]:vgpr_32, %bb.12 +; CHECK-NEXT: [[Reg129:%[0-9]+]]:vgpr_32 = PHI [[Reg114]]:vgpr_32, %bb.11, [[Reg130:%[0-9]+]]:vgpr_32, %bb.12 +; CHECK-NEXT: [[Reg131:%[0-9]+]]:vgpr_32 = PHI [[Reg118]]:vgpr_32, %bb.11, [[Reg68]]:vgpr_32, %bb.12 +; CHECK-NEXT: [[Reg132:%[0-9]+]]:vgpr_32 = PHI [[Reg113]]:vgpr_32, %bb.11, [[Reg133:%[0-9]+]]:vgpr_32, %bb.12 +; CHECK-NEXT: [[Reg134:%[0-9]+]]:vgpr_32 = PHI [[Reg117]]:vgpr_32, %bb.11, [[Reg135:%[0-9]+]]:vgpr_32, %bb.12 +; CHECK-NEXT: [[Reg136:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg131]]:vgpr_32, [[Reg69]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg137:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg134]]:vgpr_32, [[Reg113]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg50]]:vreg_64, killed [[Reg137]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg138:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg131]]:vgpr_32, [[Reg127]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg139:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg92]]:vgpr_32, killed [[Reg138]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg140:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg132]]:vgpr_32, [[Reg117]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg141:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg140]]:vgpr_32, [[Reg139]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg49]]:vreg_64, killed [[Reg141]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg130]]:vgpr_32 = V_ADD_U32_e64 4, killed [[Reg129]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg142:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 -1431655765, killed [[Reg139]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg133]]:vgpr_32 = V_LSHRREV_B32_e64 1, killed [[Reg142]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg143:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 31, [[Reg136]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg144:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg136]]:vgpr_32, killed [[Reg143]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg135]]:vgpr_32 = V_ASHRREV_I32_e64 1, killed [[Reg144]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg128]]:vgpr_32 = V_ADD_U32_e64 4, killed [[Reg127]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg145:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg130]]:vgpr_32, [[Reg24]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg126]]:sreg_32 = SI_IF_BREAK killed [[Reg145]]:sreg_32, killed [[Reg125]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: SI_LOOP [[Reg126]]:sreg_32, %bb.12, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.13 +; EMPTY: +; CHECK: bb.13.loop1.latch: +; CHECK-NEXT: ; predecessors: %bb.12 +; CHECK-NEXT: successors: %bb.14(0x04000000), %bb.5(0x7c000000); %bb.14(3.12%), %bb.5(96.88%) +; EMPTY: +; CHECK: SI_END_CF killed [[Reg126]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg146:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg140]]:vgpr_32, [[Reg27]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg48]]:vreg_64, [[Reg146]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p5, addrspace 1) +; CHECK-NEXT: [[Reg67]]:vgpr_32 = V_ADD_U32_e64 1, killed [[Reg66]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg65]]:vgpr_32 = V_ADD_U32_e64 1, killed [[Reg64]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg147:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg67]]:vgpr_32, [[Reg21]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg63]]:sreg_32 = SI_IF_BREAK killed [[Reg147]]:sreg_32, killed [[Reg62]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: SI_LOOP [[Reg63]]:sreg_32, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.14 +; EMPTY: +; CHECK: bb.14.bb6: +; CHECK-NEXT: ; predecessors: %bb.13 +; CHECK-NEXT: successors: %bb.15(0x40000000), %bb.16(0x40000000); %bb.15(50.00%), %bb.16(50.00%) +; EMPTY: +; CHECK: SI_END_CF killed [[Reg63]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg148:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 100, killed [[Reg58]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg149:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[Reg148]]:vgpr_32, [[Reg146]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg150:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg149]]:sreg_32, %bb.16, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.15 +; EMPTY: +; CHECK: bb.15.bb7: +; CHECK-NEXT: ; predecessors: %bb.14 +; CHECK-NEXT: successors: %bb.16(0x80000000); %bb.16(100.00%) +; EMPTY: +; CHECK: GLOBAL_STORE_DWORD killed [[Reg47]]:vreg_64, [[Reg148]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p6, addrspace 1) +; EMPTY: +; CHECK: bb.16.loop5.preheader: +; CHECK-NEXT: ; predecessors: %bb.14, %bb.15 +; CHECK-NEXT: successors: %bb.17(0x80000000); %bb.17(100.00%) +; EMPTY: +; CHECK: [[Reg151:%[0-9]+]]:vgpr_32 = PHI [[Reg148]]:vgpr_32, %bb.14, [[Reg146]]:vgpr_32, %bb.15 +; CHECK-NEXT: SI_END_CF killed [[Reg150]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg152:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; EMPTY: +; CHECK: bb.17.loop5: +; CHECK-NEXT: ; predecessors: %bb.16, %bb.17 +; CHECK-NEXT: successors: %bb.18(0x04000000), %bb.17(0x7c000000); %bb.18(3.12%), %bb.17(96.88%) +; EMPTY: +; CHECK: [[Reg153:%[0-9]+]]:sreg_32 = PHI [[Reg152]]:sreg_32, %bb.16, [[Reg154:%[0-9]+]]:sreg_32, %bb.17 +; CHECK-NEXT: [[Reg155:%[0-9]+]]:vgpr_32 = PHI [[Reg151]]:vgpr_32, %bb.16, [[Reg156:%[0-9]+]]:vgpr_32, %bb.17 +; CHECK-NEXT: [[Reg156]]:vgpr_32 = V_ADD_U32_e64 2, [[Reg155]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg157:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg156]]:vgpr_32, [[Reg25]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg154]]:sreg_32 = SI_IF_BREAK killed [[Reg157]]:sreg_32, killed [[Reg153]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: SI_LOOP [[Reg154]]:sreg_32, %bb.17, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.18 +; EMPTY: +; CHECK: bb.18.exit: +; CHECK-NEXT: ; predecessors: %bb.17 +; EMPTY: +; CHECK: SI_END_CF killed [[Reg154]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg158:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 100, killed [[Reg38]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg159:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg148]]:vgpr_32, killed [[Reg155]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg160:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg158]]:vgpr_32, killed [[Reg159]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg161:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[Reg160]]:vgpr_32, implicit $exec +; CHECK-NEXT: $sgpr0 = COPY killed [[Reg161]]:sreg_32_xm0 +; CHECK-NEXT: SI_RETURN_TO_EPILOG killed $sgpr0 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg28]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 27040088.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 38.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 27040035070.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 27015105.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 106.0 +; CHECK-NEXT: Next-use distance of Register [[Reg22]] = 27088.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 27040086.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 35.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 34.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 34.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 33.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 33.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 32.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 32.0 +; CHECK-NEXT: Next-use distance of Register [[Reg13]] = 31.0 +; CHECK-NEXT: Next-use distance of Register [[Reg12]] = 31.0 +; CHECK-NEXT: Next-use distance of Register [[Reg11]] = 30.0 +; CHECK-NEXT: Next-use distance of Register [[Reg10]] = 30.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 28.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 28.0 +; CHECK-NEXT: Next-use distance of Register [[Reg5]] = 27.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 27.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 26.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg39]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 27047.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 62.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 60.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 58.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 27040035020.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 27040042.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 27015052.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 27015046.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 40.0 +; CHECK-NEXT: Next-use distance of Register [[Reg52]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg53]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg54]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg56]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg57]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg58]] = 27040035005.0 +; CHECK-NEXT: Next-use distance of Register [[Reg59]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg60]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg61]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg62]] = 27040032.0 +; CHECK-NEXT: Next-use distance of Register [[Reg64]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg66]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg68]] = 27015026.0 +; CHECK-NEXT: Next-use distance of Register [[Reg70]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg71]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg72]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg73]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg74]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg75]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg76]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg77]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg78]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg79]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg80]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg81]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg82]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg83]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg84]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg85]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg86]] = 27012.0 +; CHECK-NEXT: Next-use distance of Register [[Reg88]] = 17.0 +; CHECK-NEXT: Next-use distance of Register [[Reg90]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg92]] = 27015016.0 +; CHECK-NEXT: Next-use distance of Register [[Reg94]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg95]] = 24.0 +; CHECK-NEXT: Next-use distance of Register [[Reg97]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg99]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg100]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg101]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg102]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg103]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg104]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg105]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg106]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg107]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg108]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg109]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg110]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg111]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg69]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg112]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg93]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg113]] = 27015010.0 +; CHECK-NEXT: Next-use distance of Register [[Reg114]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg98]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg115]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg116]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg96]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg91]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg117]] = 27015011.0 +; CHECK-NEXT: Next-use distance of Register [[Reg118]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg89]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg119]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg87]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg120]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg122]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg123]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg124]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg125]] = 22.0 +; CHECK-NEXT: Next-use distance of Register [[Reg127]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg129]] = 12.0 +; CHECK-NEXT: Next-use distance of Register [[Reg131]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg132]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg134]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg136]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg137]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg138]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg139]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg140]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg141]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg130]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg142]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg133]] = 13.0 +; CHECK-NEXT: Next-use distance of Register [[Reg143]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg144]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg135]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg128]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg145]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg126]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg146]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg67]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg65]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg147]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg63]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg148]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg149]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg150]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg151]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg152]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg153]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg155]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg156]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg157]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg154]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg158]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg159]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg160]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg161]] = 1.0 +entry: +; entry +; | +; bb1 +; / \ +; bb2 bb3 +; \ / +; bb4 +; | +; loop1.header<-------+ +; | | +; loop2.header<-----+ | +; | | | +; loop3.header<---+ | | +; / | | | | +; bb5 | | | | +; \ | | | | +; loop3.latch-----+ | | +; | | | +; loop2.latch-------+ | +; | | +; +-->loop4| | +; +--------+ | +; | | +; loop1.latch---------+ +; | +; bb6 +; / | +; bb17 | +; | | +; +-->loop5 +; +-----+ +; | +; exit + + %ld1 = load i32, ptr addrspace(1) %p1, align 1 + %add1 = add i32 %ld1, 100 + br label %bb1 + +bb1: + br i1 %cond1, label %bb2, label %bb3 + +bb2: + %mul1 = mul i32 %Val1, %ld1 + store i32 %mul1, ptr addrspace(1) %p1, align 4 + br label %bb4 + +bb3: + %add2 = add i32 %Val1, %ld1 + store i32 %add2, ptr addrspace(1) %p1, align 2 + br label %bb4 + +bb4: + %phi1 = phi i32 [ %mul1, %bb2 ], [ %add2, %bb3 ] + %ld2 = load i32, ptr addrspace(1) %p1, align 1 + br label %loop1.header + +loop1.header: + %phi.inc1 = phi i32 [ %ld1, %bb4 ], [ %inc1, %loop1.latch ] + %phi.phi = phi i32 [ 0, %bb4 ], [ %phi2, %loop1.latch ] + %sext1 = sext i32 %phi.inc1 to i64 + %gep1 = getelementptr inbounds i64, ptr addrspace(1) %p1, i64 %sext1 + %ld3 = load i32, ptr addrspace(1) %gep1, align 1 + %mul2 = mul i32 %Val1, %phi.inc1 + store i32 %mul2, ptr addrspace(1) %p1, align 2 + br label %loop2.header + +loop2.header: + %phi.inc2 = phi i32 [ %ld3, %loop1.header ], [ %inc2, %loop2.latch ] + %phi6 = phi i32 [ %phi.inc1, %loop1.header ], [ %phi5, %loop2.latch ] + br label %loop3.header + +loop3.header: + %phi.inc3 = phi i32 [ %phi.inc2, %loop2.header ], [ %inc3, %loop3.latch ] + %ld4 = load i32, ptr addrspace(1) %p2, align 1 + %cond2 = icmp uge i32 %phi.inc3, %ld4 + br i1 %cond2, label %bb5, label %loop3.latch + +bb5: + %mul3 = mul i32 %phi.inc1, %phi.inc2 + %add3 = add i32 %mul3, %phi.inc3 + %mul4 = mul i32 %add3, %ld4 + %add4 = add i32 %mul4, %mul2 + store i32 %add4, ptr addrspace(1) %p7 + br label %loop3.latch + +loop3.latch: + %phi2 = phi i32 [ %add4, %bb5 ], [ %ld3, %loop3.header ] + %phi4 = phi i32 [ %mul4, %bb5 ], [ %phi.inc3, %loop3.header ] + %phi5 = phi i32 [ %phi.inc2, %bb5 ], [ %phi.inc3, %loop3.header ] + store i32 %phi5, ptr addrspace(1) %p9 + %inc3 = add i32 %phi.inc3, 1 + %ld10 = load i32, ptr addrspace(1) %p10 + %mul11 = mul i32 %phi4, %phi2 + store i32 %mul11, ptr addrspace(1) %p10 + %cond3 = icmp ult i32 %inc3, %TC3 + br i1 %cond3, label %loop3.header, label %loop2.latch + +loop2.latch: + %inc2 = add i32 %phi.inc2, 1 + %ld11 = load i32, ptr addrspace(1) %p11 + %add9 = add i32 %inc2, %phi4 + store i32 %add9, ptr addrspace(1) %p11 + %cond4 = icmp ult i32 %inc2, %TC2 + br i1 %cond4, label %loop2.header, label %loop4 + +loop4: + %phi.inc4 = phi i32 [ %mul11, %loop2.latch ], [ %inc4, %loop4 ] + %phi7 = phi i32 [ %add9, %loop2.latch ], [ %phi.phi, %loop4 ] + %phi.div1 = phi i32 [ %ld10, %loop2.latch ], [ %div1, %loop4 ] + %phi.div2 = phi i32 [ %ld11, %loop2.latch ], [ %div2, %loop4 ] + %add5 = add i32 %phi7, %phi2 + %mul5 = mul i32 %phi.div2, %ld10 + store i32 %mul5, ptr addrspace(1) %p3 + %add6 = add i32 %add5, %phi.inc4 + %mul8 = mul i32 %phi6, %add6 + %mul9 = mul i32 %phi.div1, %ld11 + %add10 = add i32 %mul9, %mul8 + store i32 %add10, ptr addrspace(1) %p4 + %inc4 = add i32 %phi.inc4, 4 + %div1 = udiv i32 %mul8, 3 + %div2 = sdiv i32 %add5, 2 + %cond7 = icmp ult i32 %inc4, %TC4 + br i1 %cond7, label %loop4, label %loop1.latch + +loop1.latch: + %add7 = add i32 %mul9, %Val2 + store i32 %add7, ptr addrspace(1) %p5 + %inc1 = add i32 %phi.inc1, 1 + %cond5 = icmp ult i32 %inc1, %TC1 + br i1 %cond5, label %loop1.header, label %bb6 + +bb6: + %mul6 = mul i32 %ld2, 100 + %cond8 = icmp ugt i32 %mul6, %add7 + br i1 %cond8, label %bb7, label %loop5 + +bb7: + store i32 %mul6, ptr addrspace(1) %p6 + br label %loop5 + +loop5: + %phi.inc5 = phi i32 [ %add7, %bb7 ], [ %mul6, %bb6 ], [ %inc5, %loop5 ] + %add8 = mul i32 %mul6, %phi.inc5 + %inc5 = add i32 %phi.inc5, 2 + %cond9 = icmp ult i32 %inc5, %TC5 + br i1 %cond9, label %loop5, label %exit + +exit: + %mul7 = mul i32 %add1, %add8 + ret i32 %mul7 +} diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_loop_preheader4.ll b/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_loop_preheader4.ll new file mode 100644 index 0000000000000..44790be8ee87c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_emit_restore_in_loop_preheader4.ll @@ -0,0 +1,266 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +; +; bb.0.entry +; | +; +<--------+ +; bb.1.loop1 | +; +---------+ +; | +; bb.2.bb +; | +; +<--------+ +; bb.3.loop2 | +; +---------+ +; | +; bb.4.exit +; +define amdgpu_ps i32 @test(ptr addrspace(1) %p1, ptr addrspace(1) %p2, ptr addrspace(1) %p3, ptr addrspace(1) %p4, i32 %TC1, i32 %TC2) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]], $vgpr5 in [[Reg6:%[0-9]+]], $vgpr6 in [[Reg7:%[0-9]+]], $vgpr7 in [[Reg8:%[0-9]+]], $vgpr8 in [[Reg9:%[0-9]+]], $vgpr9 in [[Reg10:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.1(0x80000000); %bb.1(100.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9 +; CHECK-NEXT: [[Reg10]]:vgpr_32 = COPY killed $vgpr9 +; CHECK-NEXT: [[Reg9]]:vgpr_32 = COPY killed $vgpr8 +; CHECK-NEXT: [[Reg8]]:vgpr_32 = COPY killed $vgpr7 +; CHECK-NEXT: [[Reg7]]:vgpr_32 = COPY killed $vgpr6 +; CHECK-NEXT: [[Reg6]]:vgpr_32 = COPY killed $vgpr5 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg11:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg5]]:vgpr_32, %subreg.sub0, killed [[Reg6]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg12:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg3]]:vgpr_32, %subreg.sub0, killed [[Reg4]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg13:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg1]]:vgpr_32, %subreg.sub0, killed [[Reg2]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg14:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg7]]:vgpr_32, %subreg.sub0, killed [[Reg8]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg15:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg14]]:vreg_64, 0, 0, implicit $exec :: (load (s16) from %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg16:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg14]]:vreg_64, 2, 0, implicit $exec :: (load (s16) from %ir.p4 + 2, addrspace 1) +; CHECK-NEXT: [[Reg17:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg16]]:vgpr_32, 16, killed [[Reg15]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg18:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg13]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg19:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg13]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg20:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg19]]:vgpr_32, 8, killed [[Reg18]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg21:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg13]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg22:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg13]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg23:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg22]]:vgpr_32, 8, killed [[Reg21]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg24:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg23]]:vgpr_32, 16, killed [[Reg20]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg25:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 100, implicit $exec +; CHECK-NEXT: [[Reg26:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 100, [[Reg24]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg14]]:vreg_64, [[Reg26]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg27:%[0-9]+]]:sreg_32 = S_MOV_B32 1 +; CHECK-NEXT: [[Reg28:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; EMPTY: +; CHECK: bb.1.loop1: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.1 +; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000); %bb.2(3.12%), %bb.1(96.88%) +; EMPTY: +; CHECK: [[Reg29:%[0-9]+]]:sreg_32 = PHI [[Reg28]]:sreg_32, %bb.0, [[Reg30:%[0-9]+]]:sreg_32, %bb.1 +; CHECK-NEXT: [[Reg31:%[0-9]+]]:sreg_32 = PHI [[Reg28]]:sreg_32, %bb.0, [[Reg32:%[0-9]+]]:sreg_32, %bb.1 +; CHECK-NEXT: [[Reg33:%[0-9]+]]:sreg_32 = PHI [[Reg27]]:sreg_32, %bb.0, [[Reg34:%[0-9]+]]:sreg_32, %bb.1 +; CHECK-NEXT: [[Reg35:%[0-9]+]]:vgpr_32 = PHI [[Reg24]]:vgpr_32, %bb.0, [[Reg36:%[0-9]+]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg37:%[0-9]+]]:vgpr_32 = PHI [[Reg25]]:vgpr_32, %bb.0, [[Reg38:%[0-9]+]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg39:%[0-9]+]]:vgpr_32 = PHI [[Reg24]]:vgpr_32, %bb.0, [[Reg40:%[0-9]+]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg41:%[0-9]+]]:sreg_32 = S_ADD_I32 [[Reg33]]:sreg_32, -1, implicit-def dead $scc +; CHECK-NEXT: [[Reg42:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[Reg41]]:sreg_32, 31, implicit-def dead $scc +; CHECK-NEXT: [[Reg43:%[0-9]+]]:sreg_64 = REG_SEQUENCE killed [[Reg41]]:sreg_32, %subreg.sub0, killed [[Reg42]]:sreg_32_xm0, %subreg.sub1 +; CHECK-NEXT: [[Reg44:%[0-9]+]]:sreg_64 = nsw S_LSHL_B64 killed [[Reg43]]:sreg_64, 2, implicit-def dead $scc +; CHECK-NEXT: [[Reg45:%[0-9]+]]:vgpr_32, [[Reg46:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[Reg12]].sub0:vreg_64, [[Reg44]].sub0:sreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg47:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 [[Reg44]].sub1:sreg_64, [[Reg12]].sub1:vreg_64, killed [[Reg46]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg48:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg45]]:vgpr_32, %subreg.sub0, killed [[Reg47]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg49:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[Reg48]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.gep1, addrspace 1) +; CHECK-NEXT: [[Reg36]]:vgpr_32 = V_ADD_U32_e64 [[Reg33]]:sreg_32, [[Reg49]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg38]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg49]]:vgpr_32, [[Reg33]]:sreg_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_SHORT_D16_HI [[Reg13]]:vreg_64, [[Reg38]]:vgpr_32, 2, 0, implicit $exec :: (store (s16) into %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_SHORT [[Reg13]]:vreg_64, [[Reg38]]:vgpr_32, 0, 0, implicit $exec :: (store (s16) into %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg50:%[0-9]+]]:vgpr_32, [[Reg51:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[Reg11]].sub0:vreg_64, [[Reg44]].sub0:sreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg52:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 killed [[Reg44]].sub1:sreg_64, [[Reg11]].sub1:vreg_64, killed [[Reg51]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg53:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg50]]:vgpr_32, %subreg.sub0, killed [[Reg52]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg54:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg53]]:vreg_64, 0, 0, implicit $exec :: (load (s16) from %ir.gep2, addrspace 1) +; CHECK-NEXT: [[Reg55:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT killed [[Reg53]]:vreg_64, 2, 0, implicit $exec :: (load (s16) from %ir.gep2 + 2, addrspace 1) +; CHECK-NEXT: [[Reg56:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg55]]:vgpr_32, 16, killed [[Reg54]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg34]]:sreg_32 = S_ADD_I32 [[Reg33]]:sreg_32, 1, implicit-def dead $scc +; CHECK-NEXT: [[Reg32]]:sreg_32 = S_ADD_I32 [[Reg31]]:sreg_32, -1, implicit-def dead $scc +; CHECK-NEXT: [[Reg40]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg31]]:sreg_32, killed [[Reg56]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg57:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 killed [[Reg33]]:sreg_32, [[Reg9]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg30]]:sreg_32 = SI_IF_BREAK killed [[Reg57]]:sreg_32, killed [[Reg29]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: [[Reg58:%[0-9]+]]:vgpr_32 = COPY [[Reg34]]:sreg_32, implicit $exec +; CHECK-NEXT: SI_LOOP [[Reg30]]:sreg_32, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.2 +; EMPTY: +; CHECK: bb.2.bb: +; CHECK-NEXT: ; predecessors: %bb.1 +; CHECK-NEXT: successors: %bb.3(0x80000000); %bb.3(100.00%) +; EMPTY: +; CHECK: SI_END_CF killed [[Reg30]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg59:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 -2, killed [[Reg58]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg60:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg38]]:vgpr_32, killed [[Reg59]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg61:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 100, killed [[Reg35]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg62:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg61]]:vgpr_32, [[Reg17]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg11]]:vreg_64, killed [[Reg62]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg63:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; EMPTY: +; CHECK: bb.3.loop2: +; CHECK-NEXT: ; predecessors: %bb.2, %bb.3 +; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.3(0x7c000000); %bb.4(3.12%), %bb.3(96.88%) +; EMPTY: +; CHECK: [[Reg64:%[0-9]+]]:sreg_32 = PHI [[Reg63]]:sreg_32, %bb.2, [[Reg65:%[0-9]+]]:sreg_32, %bb.3 +; CHECK-NEXT: [[Reg66:%[0-9]+]]:vgpr_32 = PHI [[Reg17]]:vgpr_32, %bb.2, [[Reg67:%[0-9]+]]:vgpr_32, %bb.3 +; CHECK-NEXT: [[Reg68:%[0-9]+]]:vgpr_32 = PHI [[Reg39]]:vgpr_32, %bb.2, [[Reg60]]:vgpr_32, %bb.3 +; CHECK-NEXT: [[Reg67]]:vgpr_32 = V_ADD_U32_e64 2, killed [[Reg66]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg69:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg67]]:vgpr_32, [[Reg10]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg65]]:sreg_32 = SI_IF_BREAK killed [[Reg69]]:sreg_32, killed [[Reg64]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: SI_LOOP [[Reg65]]:sreg_32, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.4 +; EMPTY: +; CHECK: bb.4.exit: +; CHECK-NEXT: ; predecessors: %bb.3 +; EMPTY: +; CHECK: SI_END_CF killed [[Reg65]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg11]]:vreg_64, [[Reg68]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg70:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg68]]:vgpr_32, killed [[Reg67]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg71:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg70]]:vgpr_32, killed [[Reg37]]:vgpr_32, killed [[Reg61]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg72:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg71]]:vgpr_32, killed [[Reg49]]:vgpr_32, killed [[Reg26]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg73:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[Reg72]]:vgpr_32, implicit $exec +; CHECK-NEXT: $sgpr0 = COPY killed [[Reg73]]:sreg_32_xm0 +; CHECK-NEXT: SI_RETURN_TO_EPILOG killed $sgpr0 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg10]] = 32040.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 55.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg5]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg11]] = 37.0 +; CHECK-NEXT: Next-use distance of Register [[Reg12]] = 28.0 +; CHECK-NEXT: Next-use distance of Register [[Reg13]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 32017.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg22]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 28.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 24.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 32004.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 40011.0 +; CHECK-NEXT: Next-use distance of Register [[Reg39]] = 32010.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 21.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg52]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg53]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg54]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg56]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg57]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg58]] = 32002.0 +; CHECK-NEXT: Next-use distance of Register [[Reg59]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg60]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg61]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg62]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg63]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg64]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg66]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg68]] = 8002.0 +; CHECK-NEXT: Next-use distance of Register [[Reg67]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg69]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg65]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg70]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg71]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg72]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg73]] = 1.0 +entry: +; entry +; | +; +<-----+ +; loop1 | +; +------+ +; | +; bb +; | +; +<-----+ +; loop2 | +; +------+ +; | +; exit + %ld0 = load i32, ptr addrspace(1) %p4, align 2 + %ld1 = load i32, ptr addrspace(1) %p1, align 1 + %add1 = add i32 %ld1, 100 + store i32 %add1, ptr addrspace(1) %p4, align 4 + br label %loop1 + +loop1: + %phi.inc1 = phi i32 [ 0, %entry ], [ %inc1, %loop1 ] + %phi1 = phi i32 [ %ld1, %entry ], [ %add2, %loop1 ] + %phi2 = phi i32 [ 100, %entry ], [ %mul1, %loop1 ] + %phi3 = phi i32 [ %ld1, %entry ], [ %sub, %loop1 ] + %sext1 = sext i32 %phi.inc1 to i64 + %gep1 = getelementptr inbounds i32, ptr addrspace(1) %p2, i64 %sext1 + %ld2 = load i32, ptr addrspace(1) %gep1, align 4 + %inc1 = add i32 %phi.inc1, 1 + %add2 = add i32 %ld2, %inc1 + %mul1 = mul i32 %ld2, %inc1 + store i32 %mul1, ptr addrspace(1) %p1, align 2 + %mul2 = mul i32 %mul1, %phi.inc1 + %sext2 = sext i32 %inc1 to i64 + %gep2 = getelementptr inbounds i32, ptr addrspace(1) %p3, i64 %sext1 + %ld3 = load i32, ptr addrspace(1) %gep2, align 2 + %sub = sub i32 %ld3, %phi.inc1 + %cond1 = icmp ult i32 %inc1, %TC1 + br i1 %cond1, label %loop1, label %bb + +bb: + %mul3 = mul i32 %phi1, 100 + %mul4 = mul i32 %mul3, %ld0 + store i32 %mul4, ptr addrspace(1) %p3 + br label %loop2 + +loop2: + %phi.inc2 = phi i32 [ %ld0, %bb ], [ %inc2, %loop2 ] + %phi4 = phi i32 [ %phi3, %bb ], [ %mul2, %loop2 ] + %inc2 = add i32 %phi.inc2, 2 + store i32 %phi4, ptr addrspace(1) %p3 + %add3 = add i32 %phi4, %inc2 + %cond2 = icmp ult i32 %inc2, %TC2 + br i1 %cond2, label %loop2, label %exit + +exit: + %add4 = add i32 %add3, %phi2 + %add5 = add i32 %add4, %mul3 + %add6 = add i32 %add5, %ld2 + %add7 = add i32 %add6, %add1 + ret i32 %add7 +} + diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_keep_spilled_reg_live.ll b/llvm/test/CodeGen/AMDGPU/test_ers_keep_spilled_reg_live.ll new file mode 100644 index 0000000000000..b5f9cad8f2dc5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_keep_spilled_reg_live.ll @@ -0,0 +1,281 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +; +; bb.0.entry +; / | +; bb.3.bb2 | +; / | | +; bb.9.bb5 | | +; \ | | +; bb.1.Flow1 | +; \ | +; bb.8.Flow +; / | +; bb.2.bb1 | +; \ | +; bb.6.Flow2 +; / | +; bb.7.bb4 | +; \ | +; bb.4.Flow3 +; / | +; bb.5.bb3 | +; \ | +; bb.10.exit +; +define amdgpu_ps i64 @test(i1 %cond, ptr addrspace(3) %p, i64 %val) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.8(0x40000000); %bb.3(50.00%), %bb.8(50.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg5:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, killed [[Reg1]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg6:%[0-9]+]]:sreg_32 = V_CMP_NE_U32_e64 1, killed [[Reg5]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg7:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; CHECK-NEXT: [[Reg8:%[0-9]+]]:sreg_32 = SI_IF [[Reg6]]:sreg_32, %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.3 +; EMPTY: +; CHECK: bb.1.Flow1: +; CHECK-NEXT: ; predecessors: %bb.3, %bb.9 +; CHECK-NEXT: successors: %bb.8(0x80000000); %bb.8(100.00%) +; EMPTY: +; CHECK: [[Reg9:%[0-9]+]]:sreg_32 = PHI [[Reg10:%[0-9]+]]:sreg_32, %bb.3, [[Reg11:%[0-9]+]]:sreg_32, %bb.9 +; CHECK-NEXT: [[Reg12:%[0-9]+]]:vreg_64 = PHI undef [[Reg13:%[0-9]+]]:vreg_64, %bb.3, [[Reg14:%[0-9]+]]:vreg_64, %bb.9 +; CHECK-NEXT: SI_END_CF killed [[Reg15:%[0-9]+]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg16:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[Reg9]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: [[Reg17:%[0-9]+]]:sreg_32 = COPY killed [[Reg16]]:sreg_32 +; CHECK-NEXT: S_BRANCH %bb.8 +; EMPTY: +; CHECK: bb.2.bb1: +; CHECK-NEXT: ; predecessors: %bb.8 +; CHECK-NEXT: successors: %bb.6(0x80000000); %bb.6(100.00%) +; EMPTY: +; CHECK: [[Reg18:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[Reg2]]:vgpr_32, 0, 0, implicit $exec :: (load (s16) from %ir.p, addrspace 3) +; CHECK-NEXT: [[Reg19:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[Reg2]]:vgpr_32, 2, 0, implicit $exec :: (load (s16) from %ir.p + 2, addrspace 3) +; CHECK-NEXT: [[Reg20:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[Reg2]]:vgpr_32, 4, 0, implicit $exec :: (load (s16) from %ir.p + 4, addrspace 3) +; CHECK-NEXT: [[Reg21:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[Reg2]]:vgpr_32, 6, 0, implicit $exec :: (load (s16) from %ir.p + 6, addrspace 3) +; CHECK-NEXT: [[Reg22:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg21]]:vgpr_32, 16, killed [[Reg20]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg23:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg19]]:vgpr_32, 16, killed [[Reg18]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg24:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg23]]:vgpr_32, %subreg.sub0, killed [[Reg22]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg25:%[0-9]+]]:vreg_64 = COPY killed [[Reg24]]:vreg_64 +; CHECK-NEXT: [[Reg26:%[0-9]+]]:sreg_32 = COPY $exec_lo +; CHECK-NEXT: [[Reg27:%[0-9]+]]:sreg_32 = S_ANDN2_B32 killed [[Reg28:%[0-9]+]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: [[Reg29:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[Reg6]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: [[Reg30:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[Reg27]]:sreg_32, killed [[Reg29]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: S_BRANCH %bb.6 +; EMPTY: +; CHECK: bb.3.bb2: +; CHECK-NEXT: ; predecessors: %bb.0 +; CHECK-NEXT: successors: %bb.9(0x40000000), %bb.1(0x40000000); %bb.9(50.00%), %bb.1(50.00%) +; EMPTY: +; CHECK: [[Reg31:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 [[Reg2]]:vgpr_32, 8, 0, implicit $exec :: (load (s64) from %ir.gep2, addrspace 3) +; CHECK-NEXT: [[Reg10]]:sreg_32 = S_MOV_B32 -1 +; CHECK-NEXT: [[Reg15]]:sreg_32 = SI_IF [[Reg6]]:sreg_32, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.9 +; EMPTY: +; CHECK: bb.4.Flow3: +; CHECK-NEXT: ; predecessors: %bb.6, %bb.7 +; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.10(0x40000000); %bb.5(50.00%), %bb.10(50.00%) +; EMPTY: +; CHECK: [[Reg32:%[0-9]+]]:sreg_32 = PHI [[Reg33:%[0-9]+]]:sreg_32, %bb.6, [[Reg34:%[0-9]+]]:sreg_32, %bb.7 +; CHECK-NEXT: [[Reg35:%[0-9]+]]:vreg_64 = PHI [[Reg36:%[0-9]+]]:vreg_64, %bb.6, [[Reg37:%[0-9]+]]:vreg_64, %bb.7 +; CHECK-NEXT: SI_END_CF killed [[Reg38:%[0-9]+]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg39:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg32]]:sreg_32, %bb.10, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.5 +; EMPTY: +; CHECK: bb.5.bb3: +; CHECK-NEXT: ; predecessors: %bb.4 +; CHECK-NEXT: successors: %bb.10(0x80000000); %bb.10(100.00%) +; EMPTY: +; CHECK: [[Reg40:%[0-9]+]]:vreg_64 = DS_READ2_B32_gfx9 killed [[Reg2]]:vgpr_32, 6, 7, 0, implicit $exec :: (load (s64) from %ir.gep3, align 4, addrspace 3) +; CHECK-NEXT: [[Reg41:%[0-9]+]]:vgpr_32, [[Reg42:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[Reg40]].sub0:vreg_64, killed [[Reg3]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg43:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 killed [[Reg40]].sub1:vreg_64, killed [[Reg4]]:vgpr_32, killed [[Reg42]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg44:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg41]]:vgpr_32, %subreg.sub0, killed [[Reg43]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: S_BRANCH %bb.10 +; EMPTY: +; CHECK: bb.6.Flow2: +; CHECK-NEXT: ; predecessors: %bb.8, %bb.2 +; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.4(0x40000000); %bb.7(50.00%), %bb.4(50.00%) +; EMPTY: +; CHECK: [[Reg45:%[0-9]+]]:sreg_32 = PHI [[Reg28]]:sreg_32, %bb.8, [[Reg30]]:sreg_32, %bb.2 +; CHECK-NEXT: [[Reg33]]:sreg_32 = PHI [[Reg7]]:sreg_32, %bb.8, [[Reg26]]:sreg_32, %bb.2 +; CHECK-NEXT: [[Reg46:%[0-9]+]]:vreg_64 = PHI [[Reg47:%[0-9]+]]:vreg_64, %bb.8, [[Reg25]]:vreg_64, %bb.2 +; CHECK-NEXT: SI_END_CF killed [[Reg48:%[0-9]+]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg38]]:sreg_32 = SI_IF killed [[Reg45]]:sreg_32, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.7 +; EMPTY: +; CHECK: bb.7.bb4: +; CHECK-NEXT: ; predecessors: %bb.6 +; CHECK-NEXT: successors: %bb.4(0x80000000); %bb.4(100.00%) +; EMPTY: +; CHECK: [[Reg49:%[0-9]+]]:vgpr_32, [[Reg50:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[Reg46]].sub0:vreg_64, [[Reg3]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg51:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 killed [[Reg46]].sub1:vreg_64, [[Reg4]]:vgpr_32, killed [[Reg50]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg37]]:vreg_64 = REG_SEQUENCE killed [[Reg49]]:vgpr_32, %subreg.sub0, killed [[Reg51]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg52:%[0-9]+]]:sreg_32 = S_ANDN2_B32 killed [[Reg33]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: [[Reg34]]:sreg_32 = COPY killed [[Reg52]]:sreg_32 +; CHECK-NEXT: S_BRANCH %bb.4 +; EMPTY: +; CHECK: bb.8.Flow: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.1 +; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.6(0x40000000); %bb.2(50.00%), %bb.6(50.00%) +; EMPTY: +; CHECK: [[Reg28]]:sreg_32 = PHI [[Reg7]]:sreg_32, %bb.0, [[Reg17]]:sreg_32, %bb.1 +; CHECK-NEXT: [[Reg36]]:vreg_64 = PHI undef [[Reg53:%[0-9]+]]:vreg_64, %bb.0, [[Reg12]]:vreg_64, %bb.1 +; CHECK-NEXT: [[Reg47]]:vreg_64 = PHI undef [[Reg53]]:vreg_64, %bb.0, [[Reg31]]:vreg_64, %bb.1 +; CHECK-NEXT: [[Reg48]]:sreg_32 = SI_ELSE killed [[Reg8]]:sreg_32, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.2 +; EMPTY: +; CHECK: bb.9.bb5: +; CHECK-NEXT: ; predecessors: %bb.3 +; CHECK-NEXT: successors: %bb.1(0x80000000); %bb.1(100.00%) +; EMPTY: +; CHECK: [[Reg54:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 32, 0, implicit $exec :: (load (s8) from %ir.gep4, addrspace 3) +; CHECK-NEXT: [[Reg55:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 33, 0, implicit $exec :: (load (s8) from %ir.gep4 + 1, addrspace 3) +; CHECK-NEXT: [[Reg56:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 34, 0, implicit $exec :: (load (s8) from %ir.gep4 + 2, addrspace 3) +; CHECK-NEXT: [[Reg57:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 35, 0, implicit $exec :: (load (s8) from %ir.gep4 + 3, addrspace 3) +; CHECK-NEXT: [[Reg58:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 36, 0, implicit $exec :: (load (s8) from %ir.gep4 + 4, addrspace 3) +; CHECK-NEXT: [[Reg59:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 37, 0, implicit $exec :: (load (s8) from %ir.gep4 + 5, addrspace 3) +; CHECK-NEXT: [[Reg60:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 38, 0, implicit $exec :: (load (s8) from %ir.gep4 + 6, addrspace 3) +; CHECK-NEXT: [[Reg61:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg2]]:vgpr_32, 39, 0, implicit $exec :: (load (s8) from %ir.gep4 + 7, addrspace 3) +; CHECK-NEXT: [[Reg62:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg59]]:vgpr_32, 8, killed [[Reg58]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg63:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg61]]:vgpr_32, 8, killed [[Reg60]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg64:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg63]]:vgpr_32, 16, killed [[Reg62]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg65:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg55]]:vgpr_32, 8, killed [[Reg54]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg66:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg57]]:vgpr_32, 8, killed [[Reg56]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg67:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg66]]:vgpr_32, 16, killed [[Reg65]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg68:%[0-9]+]]:vgpr_32, [[Reg69:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 killed [[Reg67]]:vgpr_32, [[Reg3]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg70:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 killed [[Reg64]]:vgpr_32, [[Reg4]]:vgpr_32, killed [[Reg69]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg14]]:vreg_64 = REG_SEQUENCE killed [[Reg68]]:vgpr_32, %subreg.sub0, killed [[Reg70]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg11]]:sreg_32 = S_XOR_B32 $exec_lo, -1, implicit-def dead $scc +; CHECK-NEXT: S_BRANCH %bb.1 +; EMPTY: +; CHECK: bb.10.exit: +; CHECK-NEXT: ; predecessors: %bb.4, %bb.5 +; EMPTY: +; CHECK: [[Reg71:%[0-9]+]]:vreg_64 = PHI [[Reg35]]:vreg_64, %bb.4, [[Reg44]]:vreg_64, %bb.5 +; CHECK-NEXT: SI_END_CF killed [[Reg39]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg72:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[Reg71]].sub0:vreg_64, implicit $exec +; CHECK-NEXT: [[Reg73:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[Reg71]].sub1:vreg_64, implicit $exec +; CHECK-NEXT: $sgpr0 = COPY killed [[Reg72]]:sreg_32_xm0 +; CHECK-NEXT: $sgpr1 = COPY killed [[Reg73]]:sreg_32_xm0 +; CHECK-NEXT: SI_RETURN_TO_EPILOG killed $sgpr0, killed $sgpr1 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg4]] = 21.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg5]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg12]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg22]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 12.0 +; CHECK-NEXT: Next-use distance of Register [[Reg10]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg39]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg52]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg54]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg56]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg57]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg58]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg59]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg60]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg61]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg62]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg63]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg64]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg65]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg66]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg67]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg68]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg69]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg70]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg11]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg71]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg72]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg73]] = 2.0 +entry: +; entry +; / \ +; bb1 bb2 +; / \ / \ +; bb3 bb4 bb5 +; \ | / +; exit + br i1 %cond, label %bb1, label %bb2 + +bb1: + %gep1 = getelementptr inbounds i64, ptr addrspace(3) %p, i64 0 + %ld1 = load i64, ptr addrspace(3) %gep1, align 2 + br i1 %cond, label %bb3, label %bb4 + +bb2: + %gep2 = getelementptr inbounds i64, ptr addrspace(3) %p, i64 1 + %ld2 = load i64, ptr addrspace(3) %gep2, align 8 + br i1 %cond, label %bb4, label %bb5 + +bb3: + %gep3 = getelementptr inbounds i64, ptr addrspace(3) %p, i64 3 + %ld3 = load i64, ptr addrspace(3) %gep3, align 4 + %add1 = add i64 %ld3, %val + br label %exit + +bb4: + %phi1 = phi i64 [ %ld1, %bb1 ], [ %ld2, %bb2] + %add2 = add i64 %phi1, %val + br label %exit + +bb5: + %gep4 = getelementptr inbounds i64, ptr addrspace(3) %p, i64 4 + %ld4 = load i64, ptr addrspace(3) %gep4, align 1 + %add3 = add i64 %ld4, %val + br label %exit + +exit: + %phi2 = phi i64 [ %add1, %bb3 ], [ %add2, %bb4 ], [ %add3, %bb5 ] + ret i64 %phi2 +} diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_multiple_spills1.ll b/llvm/test/CodeGen/AMDGPU/test_ers_multiple_spills1.ll new file mode 100644 index 0000000000000..e155cc25fff04 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_multiple_spills1.ll @@ -0,0 +1,1048 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +@array2 = global [5 x i32] zeroinitializer, align 4 +@array3 = global [5 x i32] zeroinitializer, align 4 +@array4 = global [5 x i32] zeroinitializer, align 4 +@array5 = global [5 x i32] zeroinitializer, align 4 + +@array6 = global [5 x i32] zeroinitializer, align 4 +@array7 = global [5 x i32] zeroinitializer, align 4 +@array8 = global [5 x i32] zeroinitializer, align 4 +@array9 = global [5 x i32] zeroinitializer, align 4 + +; bb.0.entry +; / | +; bb.3.bb2 | +; \ | +; bb.1.Flow3 +; / | +; bb.2.bb1 | +; \ | +; bb.4.bb3 +; / | +; bb.7.bb5 | +; \ | +; bb.5.Flow2 +; / | +; bb.6.bb4 | +; \ | +; bb.8.bb6 +; / | +; bb.11.bb8 | +; \ | +; bb.9.Flow +; / | +; bb.10.bb7 | +; \ | +; bb.12.Flow1 +; / | +; bb.13.bb9 | +; \ | +; bb.14.bb10 +; +define amdgpu_ps void @test(ptr addrspace(1) %p1, ptr addrspace(3) %p2, ptr addrspace(1) %p3, ptr addrspace(1) %p4, ptr addrspace(1) %p5, ptr addrspace(1) %p6, ptr addrspace(1) %p7, ptr addrspace(1) %p8, ptr addrspace(1) %p9, ptr addrspace(1) %p10) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]], $vgpr5 in [[Reg6:%[0-9]+]], $vgpr6 in [[Reg7:%[0-9]+]], $vgpr7 in [[Reg8:%[0-9]+]], $vgpr8 in [[Reg9:%[0-9]+]], $vgpr9 in [[Reg10:%[0-9]+]], $vgpr10 in [[Reg11:%[0-9]+]], $vgpr11 in [[Reg12:%[0-9]+]], $vgpr12 in [[Reg13:%[0-9]+]], $vgpr13 in [[Reg14:%[0-9]+]], $vgpr14 in [[Reg15:%[0-9]+]], $vgpr15 in [[Reg16:%[0-9]+]], $vgpr16 in [[Reg17:%[0-9]+]], $vgpr17 in [[Reg18:%[0-9]+]], $vgpr18 in [[Reg19:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000); %bb.3(50.00%), %bb.1(50.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18 +; CHECK-NEXT: [[Reg19]]:vgpr_32 = COPY killed $vgpr18 +; CHECK-NEXT: [[Reg18]]:vgpr_32 = COPY killed $vgpr17 +; CHECK-NEXT: [[Reg17]]:vgpr_32 = COPY killed $vgpr16 +; CHECK-NEXT: [[Reg16]]:vgpr_32 = COPY killed $vgpr15 +; CHECK-NEXT: [[Reg15]]:vgpr_32 = COPY killed $vgpr14 +; CHECK-NEXT: [[Reg14]]:vgpr_32 = COPY killed $vgpr13 +; CHECK-NEXT: [[Reg13]]:vgpr_32 = COPY killed $vgpr12 +; CHECK-NEXT: [[Reg12]]:vgpr_32 = COPY killed $vgpr11 +; CHECK-NEXT: [[Reg11]]:vgpr_32 = COPY killed $vgpr10 +; CHECK-NEXT: [[Reg10]]:vgpr_32 = COPY killed $vgpr9 +; CHECK-NEXT: [[Reg9]]:vgpr_32 = COPY killed $vgpr8 +; CHECK-NEXT: [[Reg8]]:vgpr_32 = COPY killed $vgpr7 +; CHECK-NEXT: [[Reg7]]:vgpr_32 = COPY killed $vgpr6 +; CHECK-NEXT: [[Reg6]]:vgpr_32 = COPY killed $vgpr5 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg20:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg14]]:vgpr_32, %subreg.sub0, killed [[Reg15]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg21:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg1]]:vgpr_32, %subreg.sub0, killed [[Reg2]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg22:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg8]]:vgpr_32, %subreg.sub0, killed [[Reg9]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg23:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg6]]:vgpr_32, %subreg.sub0, killed [[Reg7]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg24:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg23]]:vreg_64, 0, 0, implicit $exec :: (load (s16) from %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg25:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg23]]:vreg_64, 2, 0, implicit $exec :: (load (s16) from %ir.p4 + 2, addrspace 1) +; CHECK-NEXT: [[Reg26:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg25]]:vgpr_32, 16, killed [[Reg24]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg27:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg22]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p5, align 8, addrspace 1) +; CHECK-NEXT: [[Reg28:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg21]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg29:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg21]]:vreg_64, 12, 0, implicit $exec :: (load (s8) from %ir.gep1, addrspace 1) +; CHECK-NEXT: [[Reg30:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg21]]:vreg_64, 13, 0, implicit $exec :: (load (s8) from %ir.gep1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg31:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg30]]:vgpr_32, 8, killed [[Reg29]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg32:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg21]]:vreg_64, 14, 0, implicit $exec :: (load (s8) from %ir.gep1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg33:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg21]]:vreg_64, 15, 0, implicit $exec :: (load (s8) from %ir.gep1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg34:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg33]]:vgpr_32, 8, killed [[Reg32]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg26]]:vgpr_32, [[Reg27]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg21]]:vreg_64, [[Reg35]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg36:%[0-9]+]]:sreg_32 = V_CMP_LT_U32_e64 killed [[Reg27]]:vgpr_32, [[Reg35]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg37:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg36]]:sreg_32, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.3 +; EMPTY: +; CHECK: bb.1.Flow6: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.3 +; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000); %bb.2(50.00%), %bb.4(50.00%) +; EMPTY: +; CHECK: [[Reg38:%[0-9]+]]:vgpr_32 = PHI undef [[Reg39:%[0-9]+]]:vgpr_32, %bb.0, [[Reg40:%[0-9]+]]:vgpr_32, %bb.3 +; CHECK-NEXT: [[Reg41:%[0-9]+]]:vgpr_32 = PHI undef [[Reg39]]:vgpr_32, %bb.0, [[Reg42:%[0-9]+]]:vgpr_32, %bb.3 +; CHECK-NEXT: [[Reg43:%[0-9]+]]:vgpr_32 = PHI undef [[Reg39]]:vgpr_32, %bb.0, [[Reg44:%[0-9]+]]:vgpr_32, %bb.3 +; CHECK-NEXT: [[Reg45:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg4]]:vgpr_32, %subreg.sub0, killed [[Reg5]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg46:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg34]]:vgpr_32, 16, killed [[Reg31]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg47:%[0-9]+]]:sreg_32 = SI_ELSE killed [[Reg37]]:sreg_32, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.2 +; EMPTY: +; CHECK: bb.2.bb1: +; CHECK-NEXT: ; predecessors: %bb.1 +; CHECK-NEXT: successors: %bb.4(0x80000000); %bb.4(100.00%) +; EMPTY: +; CHECK: [[Reg48:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[Reg45]]:vreg_64, 0, 0, implicit $exec :: (load (s128) from %ir.p3, align 4, addrspace 1) +; CHECK-NEXT: [[Reg49:%[0-9]+]]:vgpr_32 = COPY [[Reg48]].sub2:vreg_128 +; CHECK-NEXT: [[Reg50:%[0-9]+]]:vgpr_32 = COPY [[Reg48]].sub0:vreg_128 +; CHECK-NEXT: [[Reg51:%[0-9]+]]:vgpr_32 = COPY [[Reg48]].sub1:vreg_128 +; CHECK-NEXT: [[Reg52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg48]].sub0:vreg_128, [[Reg48]].sub1:vreg_128, 0, implicit $exec +; CHECK-NEXT: [[Reg53:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg52]]:vgpr_32, killed [[Reg48]].sub2:vreg_128, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg45]]:vreg_64, killed [[Reg53]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p3, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg20]]:vreg_64, killed [[Reg52]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p8, addrspace 1) +; CHECK-NEXT: S_BRANCH %bb.4 +; EMPTY: +; CHECK: bb.3.bb2: +; CHECK-NEXT: ; predecessors: %bb.0 +; CHECK-NEXT: successors: %bb.1(0x80000000); %bb.1(100.00%) +; EMPTY: +; CHECK: [[Reg54:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 0, 0, implicit $exec :: (load (s8) from %ir.p2, addrspace 3) +; CHECK-NEXT: [[Reg55:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 1, 0, implicit $exec :: (load (s8) from %ir.p2 + 1, addrspace 3) +; CHECK-NEXT: [[Reg56:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 2, 0, implicit $exec :: (load (s8) from %ir.p2 + 2, addrspace 3) +; CHECK-NEXT: [[Reg57:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 3, 0, implicit $exec :: (load (s8) from %ir.p2 + 3, addrspace 3) +; CHECK-NEXT: [[Reg58:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg55]]:vgpr_32, 8, killed [[Reg54]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg59:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg57]]:vgpr_32, 8, killed [[Reg56]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg60:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg59]]:vgpr_32, 16, killed [[Reg58]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg61:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg20]]:vreg_64, 4, 0, implicit $exec :: (load (s8) from %ir.p8 + 4, addrspace 1) +; CHECK-NEXT: [[Reg62:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg20]]:vreg_64, 5, 0, implicit $exec :: (load (s8) from %ir.p8 + 5, addrspace 1) +; CHECK-NEXT: [[Reg63:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg62]]:vgpr_32, 8, killed [[Reg61]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg64:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg20]]:vreg_64, 6, 0, implicit $exec :: (load (s8) from %ir.p8 + 6, addrspace 1) +; CHECK-NEXT: [[Reg65:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg20]]:vreg_64, 7, 0, implicit $exec :: (load (s8) from %ir.p8 + 7, addrspace 1) +; CHECK-NEXT: [[Reg66:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg65]]:vgpr_32, 8, killed [[Reg64]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg42]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg66]]:vgpr_32, 16, killed [[Reg63]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg67:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg20]]:vreg_64, 8, 0, implicit $exec :: (load (s8) from %ir.p8 + 8, addrspace 1) +; CHECK-NEXT: [[Reg68:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg20]]:vreg_64, 9, 0, implicit $exec :: (load (s8) from %ir.p8 + 9, addrspace 1) +; CHECK-NEXT: [[Reg69:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg68]]:vgpr_32, 8, killed [[Reg67]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg70:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg20]]:vreg_64, 10, 0, implicit $exec :: (load (s8) from %ir.p8 + 10, addrspace 1) +; CHECK-NEXT: [[Reg71:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg20]]:vreg_64, 11, 0, implicit $exec :: (load (s8) from %ir.p8 + 11, addrspace 1) +; CHECK-NEXT: [[Reg72:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg71]]:vgpr_32, 8, killed [[Reg70]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg40]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg72]]:vgpr_32, 16, killed [[Reg69]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg73:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[Reg40]]:vgpr_32, %subreg.sub0, undef [[Reg74:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg75:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 killed [[Reg60]]:vgpr_32, [[Reg42]]:vgpr_32, killed [[Reg73]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg44]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg75]].sub0:vreg_64, [[Reg28]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.1 +; EMPTY: +; CHECK: bb.4.bb3: +; CHECK-NEXT: ; predecessors: %bb.1, %bb.2 +; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.5(0x40000000); %bb.7(50.00%), %bb.5(50.00%) +; EMPTY: +; CHECK: [[Reg76:%[0-9]+]]:vgpr_32 = PHI [[Reg43]]:vgpr_32, %bb.1, [[Reg50]]:vgpr_32, %bb.2 +; CHECK-NEXT: [[Reg77:%[0-9]+]]:vgpr_32 = PHI [[Reg35]]:vgpr_32, %bb.1, [[Reg50]]:vgpr_32, %bb.2 +; CHECK-NEXT: [[Reg78:%[0-9]+]]:vgpr_32 = PHI [[Reg41]]:vgpr_32, %bb.1, [[Reg51]]:vgpr_32, %bb.2 +; CHECK-NEXT: [[Reg79:%[0-9]+]]:vgpr_32 = PHI [[Reg38]]:vgpr_32, %bb.1, [[Reg49]]:vgpr_32, %bb.2 +; CHECK-NEXT: SI_END_CF killed [[Reg47]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg80:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg26]]:vgpr_32, %subreg.sub0, undef [[Reg81:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg82:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 [[Reg76]]:vgpr_32, [[Reg28]]:vgpr_32, killed [[Reg80]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg83:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[Reg77]]:vgpr_32, %subreg.sub0, undef [[Reg84:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg85:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 killed [[Reg82]].sub0:vreg_64, killed [[Reg35]]:vgpr_32, killed [[Reg83]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg86:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg85]].sub0:vreg_64, [[Reg78]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg87:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg85]].sub0:vreg_64, [[Reg79]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg88:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg76]]:vgpr_32, [[Reg78]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg89:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array2, target-flags(amdgpu-gotprel32-hi) @array2, implicit-def dead $scc +; CHECK-NEXT: [[Reg90:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg89]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg91:%[0-9]+]]:vreg_64 = COPY killed [[Reg90]]:sreg_64_xexec +; CHECK-NEXT: [[Reg92:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg91]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array2, i64 20)`) +; CHECK-NEXT: [[Reg93:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg92]]:vgpr_32, [[Reg79]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg94:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array4, target-flags(amdgpu-gotprel32-hi) @array4, implicit-def dead $scc +; CHECK-NEXT: [[Reg95:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg94]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg96:%[0-9]+]]:vreg_64 = COPY killed [[Reg95]]:sreg_64_xexec +; CHECK-NEXT: FLAT_STORE_DWORD [[Reg96]]:vreg_64, [[Reg93]]:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array4, i64 4)`) +; CHECK-NEXT: [[Reg97:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 20, 0, implicit $exec :: (load (s8) from %ir.p3 + 20, addrspace 1) +; CHECK-NEXT: [[Reg98:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 21, 0, implicit $exec :: (load (s8) from %ir.p3 + 21, addrspace 1) +; CHECK-NEXT: [[Reg99:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg98]]:vgpr_32, 8, killed [[Reg97]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg100:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 22, 0, implicit $exec :: (load (s8) from %ir.p3 + 22, addrspace 1) +; CHECK-NEXT: [[Reg101:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 23, 0, implicit $exec :: (load (s8) from %ir.p3 + 23, addrspace 1) +; CHECK-NEXT: [[Reg102:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg101]]:vgpr_32, 8, killed [[Reg100]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg103:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg102]]:vgpr_32, 16, killed [[Reg99]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg104:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg105:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p3 + 1, addrspace 1) +; CHECK-NEXT: [[Reg106:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg105]]:vgpr_32, 8, killed [[Reg104]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg107:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p3 + 2, addrspace 1) +; CHECK-NEXT: [[Reg108:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p3 + 3, addrspace 1) +; CHECK-NEXT: [[Reg109:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg108]]:vgpr_32, 8, killed [[Reg107]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg110:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg109]]:vgpr_32, 16, killed [[Reg106]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg111:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 28, 0, implicit $exec :: (load (s8) from %ir.p3 + 28, addrspace 1) +; CHECK-NEXT: [[Reg112:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 29, 0, implicit $exec :: (load (s8) from %ir.p3 + 29, addrspace 1) +; CHECK-NEXT: [[Reg113:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg112]]:vgpr_32, 8, killed [[Reg111]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg114:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 30, 0, implicit $exec :: (load (s8) from %ir.p3 + 30, addrspace 1) +; CHECK-NEXT: [[Reg115:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 31, 0, implicit $exec :: (load (s8) from %ir.p3 + 31, addrspace 1) +; CHECK-NEXT: [[Reg116:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg115]]:vgpr_32, 8, killed [[Reg114]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg117:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg116]]:vgpr_32, 16, killed [[Reg113]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg118:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 24, 0, implicit $exec :: (load (s8) from %ir.p3 + 24, addrspace 1) +; CHECK-NEXT: [[Reg119:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 25, 0, implicit $exec :: (load (s8) from %ir.p3 + 25, addrspace 1) +; CHECK-NEXT: [[Reg120:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg119]]:vgpr_32, 8, killed [[Reg118]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg121:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg45]]:vreg_64, 26, 0, implicit $exec :: (load (s8) from %ir.p3 + 26, addrspace 1) +; CHECK-NEXT: [[Reg122:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[Reg45]]:vreg_64, 27, 0, implicit $exec :: (load (s8) from %ir.p3 + 27, addrspace 1) +; CHECK-NEXT: [[Reg123:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg122]]:vgpr_32, 8, killed [[Reg121]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg124:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg123]]:vgpr_32, 16, killed [[Reg120]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg125:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg91]]:vreg_64, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array2, i64 28)`) +; CHECK-NEXT: [[Reg126:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg125]]:vgpr_32, [[Reg88]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg127:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg93]]:vgpr_32, [[Reg78]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg128:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg79]]:vgpr_32, [[Reg92]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg129:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg128]]:vgpr_32, [[Reg126]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg130:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg127]]:vgpr_32, [[Reg129]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg131:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg130]]:vgpr_32, [[Reg124]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg132:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array3, target-flags(amdgpu-gotprel32-hi) @array3, implicit-def dead $scc +; CHECK-NEXT: [[Reg133:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg132]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg134:%[0-9]+]]:vreg_64 = COPY killed [[Reg133]]:sreg_64_xexec +; CHECK-NEXT: FLAT_STORE_DWORD [[Reg134]]:vreg_64, [[Reg131]]:vgpr_32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 68)`) +; CHECK-NEXT: [[Reg135:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array5, target-flags(amdgpu-gotprel32-hi) @array5, implicit-def dead $scc +; CHECK-NEXT: [[Reg136:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg135]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg137:%[0-9]+]]:vreg_64 = COPY killed [[Reg136]]:sreg_64_xexec +; CHECK-NEXT: [[Reg138:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg137]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array5, i64 20)`) +; CHECK-NEXT: [[Reg139:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg138]]:vgpr_32, [[Reg117]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg140:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg139]]:vgpr_32, killed [[Reg77]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg141:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg92]]:vgpr_32, [[Reg117]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg142:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg79]]:vgpr_32, killed [[Reg78]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg143:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg140]]:vgpr_32, [[Reg127]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg144:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg139]]:vgpr_32, [[Reg130]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg145:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg117]]:vgpr_32, killed [[Reg79]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg146:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg141]]:vgpr_32, [[Reg125]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg147:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array6, target-flags(amdgpu-gotprel32-hi) @array6, implicit-def dead $scc +; CHECK-NEXT: [[Reg148:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg147]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg149:%[0-9]+]]:vreg_64 = COPY killed [[Reg148]]:sreg_64_xexec +; CHECK-NEXT: [[Reg150:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg149]]:vreg_64, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array6, i64 44)`) +; CHECK-NEXT: [[Reg151:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg150]]:vgpr_32, [[Reg141]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg152:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array7, target-flags(amdgpu-gotprel32-hi) @array7, implicit-def dead $scc +; CHECK-NEXT: [[Reg153:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg152]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg154:%[0-9]+]]:vreg_64 = COPY killed [[Reg153]]:sreg_64_xexec +; CHECK-NEXT: [[Reg155:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg154]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array7, i64 20)`) +; CHECK-NEXT: [[Reg156:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array8, target-flags(amdgpu-gotprel32-hi) @array8, implicit-def dead $scc +; CHECK-NEXT: [[Reg157:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg156]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg158:%[0-9]+]]:vreg_64 = COPY killed [[Reg157]]:sreg_64_xexec +; CHECK-NEXT: [[Reg159:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg158]]:vreg_64, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array8, i64 44)`, align 8) +; CHECK-NEXT: [[Reg160:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array9, target-flags(amdgpu-gotprel32-hi) @array9, implicit-def dead $scc +; CHECK-NEXT: [[Reg161:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg160]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg162:%[0-9]+]]:vreg_64 = COPY killed [[Reg161]]:sreg_64_xexec +; CHECK-NEXT: [[Reg163:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg162]]:vreg_64, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array9, i64 24)`) +; CHECK-NEXT: [[Reg164:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg163]]:vgpr_32, [[Reg131]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg165:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg91]]:vreg_64, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array2, i64 84)`) +; CHECK-NEXT: [[Reg166:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg165]]:vgpr_32, killed [[Reg130]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg167:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg134]]:vreg_64, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 80)`) +; CHECK-NEXT: [[Reg168:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg96]]:vreg_64, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array4, i64 80)`, align 8) +; CHECK-NEXT: [[Reg169:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg137]]:vreg_64, 88, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array5, i64 88)`) +; CHECK-NEXT: [[Reg170:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg169]]:vgpr_32, killed [[Reg127]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg171:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg158]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array8, i64 20)`) +; CHECK-NEXT: [[Reg172:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg171]]:vgpr_32, killed [[Reg126]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg173:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg96]]:vreg_64, 8, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array4, i64 8)`) +; CHECK-NEXT: [[Reg174:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg91]]:vreg_64, 12, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array2, i64 12)`) +; CHECK-NEXT: [[Reg175:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg134]]:vreg_64, 4, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 4)`) +; CHECK-NEXT: [[Reg176:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg137]]:vreg_64, 4, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array5, i64 4)`) +; CHECK-NEXT: [[Reg177:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg176]]:vgpr_32, [[Reg139]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg178:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg23]]:vreg_64, 16, 0, implicit $exec :: (load (s8) from %ir.p4 + 16, addrspace 1) +; CHECK-NEXT: [[Reg179:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg23]]:vreg_64, 17, 0, implicit $exec :: (load (s8) from %ir.p4 + 17, addrspace 1) +; CHECK-NEXT: [[Reg180:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg179]]:vgpr_32, 8, killed [[Reg178]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg181:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg23]]:vreg_64, 18, 0, implicit $exec :: (load (s8) from %ir.p4 + 18, addrspace 1) +; CHECK-NEXT: [[Reg182:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg23]]:vreg_64, 19, 0, implicit $exec :: (load (s8) from %ir.p4 + 19, addrspace 1) +; CHECK-NEXT: [[Reg183:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg182]]:vgpr_32, 8, killed [[Reg181]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg184:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg183]]:vgpr_32, 16, killed [[Reg180]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg185:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg23]]:vreg_64, 12, 0, implicit $exec :: (load (s8) from %ir.p4 + 12, addrspace 1) +; CHECK-NEXT: [[Reg186:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg23]]:vreg_64, 13, 0, implicit $exec :: (load (s8) from %ir.p4 + 13, addrspace 1) +; CHECK-NEXT: [[Reg187:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg186]]:vgpr_32, 8, killed [[Reg185]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg188:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg23]]:vreg_64, 14, 0, implicit $exec :: (load (s8) from %ir.p4 + 14, addrspace 1) +; CHECK-NEXT: [[Reg189:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg23]]:vreg_64, 15, 0, implicit $exec :: (load (s8) from %ir.p4 + 15, addrspace 1) +; CHECK-NEXT: [[Reg190:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg189]]:vgpr_32, 8, killed [[Reg188]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg191:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg190]]:vgpr_32, 16, killed [[Reg187]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg192:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg191]]:vgpr_32, killed [[Reg141]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg193:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[Reg184]]:vgpr_32, [[Reg140]]:vgpr_32, killed [[Reg192]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg194:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[Reg177]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg195:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e64 0, killed [[Reg194]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg196:%[0-9]+]]:vgpr_32 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, 1333788670, 0, killed [[Reg195]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg197:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, killed [[Reg196]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg198:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 0, [[Reg177]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg199:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg198]]:vgpr_32, [[Reg197]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg200:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg197]]:vgpr_32, killed [[Reg199]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg201:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg197]]:vgpr_32, killed [[Reg200]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg202:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg193]]:vgpr_32, killed [[Reg201]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg203:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg202]]:vgpr_32, [[Reg177]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg204:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg193]]:vgpr_32, killed [[Reg203]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg205:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 [[Reg204]]:vgpr_32, [[Reg177]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg206:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg202]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg207:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg202]]:vgpr_32, 0, killed [[Reg206]]:vgpr_32, [[Reg205]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg208:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg204]]:vgpr_32, [[Reg177]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg209:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg204]]:vgpr_32, 0, killed [[Reg208]]:vgpr_32, killed [[Reg205]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg210:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 killed [[Reg209]]:vgpr_32, killed [[Reg177]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg211:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg207]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg212:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg207]]:vgpr_32, 0, killed [[Reg211]]:vgpr_32, killed [[Reg210]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg213:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg175]]:vgpr_32, [[Reg93]]:vgpr_32, killed [[Reg212]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg214:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg213]]:vgpr_32, killed [[Reg169]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg215:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg214]]:vgpr_32, killed [[Reg163]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg216:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg174]]:vgpr_32, [[Reg131]]:vgpr_32, killed [[Reg215]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg217:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg86]]:vgpr_32, %subreg.sub0, undef [[Reg218:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg219:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 killed [[Reg216]]:vgpr_32, killed [[Reg87]]:vgpr_32, killed [[Reg217]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg220:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg219]].sub0:vreg_64, killed [[Reg124]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg221:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[Reg117]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg222:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e64 0, killed [[Reg221]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg223:%[0-9]+]]:vgpr_32 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, 1333788670, 0, killed [[Reg222]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg224:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, killed [[Reg223]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg225:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 0, [[Reg117]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg226:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg225]]:vgpr_32, [[Reg224]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg227:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg224]]:vgpr_32, killed [[Reg226]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg228:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg224]]:vgpr_32, killed [[Reg227]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg229:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg220]]:vgpr_32, killed [[Reg228]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg230:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg229]]:vgpr_32, [[Reg117]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg231:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg220]]:vgpr_32, killed [[Reg230]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg232:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 [[Reg231]]:vgpr_32, [[Reg117]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg233:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg229]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg234:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg229]]:vgpr_32, 0, killed [[Reg233]]:vgpr_32, [[Reg232]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg235:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg231]]:vgpr_32, [[Reg117]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg236:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg231]]:vgpr_32, 0, killed [[Reg235]]:vgpr_32, killed [[Reg232]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg237:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 killed [[Reg236]]:vgpr_32, killed [[Reg117]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg238:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg234]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg239:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg234]]:vgpr_32, 0, killed [[Reg238]]:vgpr_32, killed [[Reg237]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: DS_WRITE_B8_D16_HI [[Reg3]]:vgpr_32, [[Reg239]]:vgpr_32, 2, 0, implicit $exec :: (store (s8) into %ir.p2 + 2, addrspace 3) +; CHECK-NEXT: DS_WRITE_B8_gfx9 [[Reg3]]:vgpr_32, [[Reg239]]:vgpr_32, 0, 0, implicit $exec :: (store (s8) into %ir.p2, addrspace 3) +; CHECK-NEXT: [[Reg240:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 24, [[Reg239]]:vgpr_32, implicit $exec +; CHECK-NEXT: DS_WRITE_B8_gfx9 [[Reg3]]:vgpr_32, killed [[Reg240]]:vgpr_32, 3, 0, implicit $exec :: (store (s8) into %ir.p2 + 3, addrspace 3) +; CHECK-NEXT: [[Reg241:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 8, [[Reg239]]:vgpr_32, implicit $exec +; CHECK-NEXT: DS_WRITE_B8_gfx9 killed [[Reg3]]:vgpr_32, killed [[Reg241]]:vgpr_32, 1, 0, implicit $exec :: (store (s8) into %ir.p2 + 1, addrspace 3) +; CHECK-NEXT: [[Reg242:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg239]]:vgpr_32, killed [[Reg171]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg243:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[Reg159]]:vgpr_32, %subreg.sub0, undef [[Reg244:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg245:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 killed [[Reg242]]:vgpr_32, [[Reg184]]:vgpr_32, killed [[Reg243]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg246:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[Reg92]]:vgpr_32, %subreg.sub0, undef [[Reg247:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg248:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 killed [[Reg245]].sub0:vreg_64, killed [[Reg125]]:vgpr_32, killed [[Reg246]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg249:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg248]].sub0:vreg_64, [[Reg88]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg250:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg249]]:vgpr_32, %subreg.sub0, undef [[Reg251:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg252:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 killed [[Reg173]]:vgpr_32, [[Reg93]]:vgpr_32, killed [[Reg250]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg253:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg252]].sub0:vreg_64, killed [[Reg172]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg254:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[Reg170]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg255:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e64 0, killed [[Reg254]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg256:%[0-9]+]]:vgpr_32 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, 1333788670, 0, killed [[Reg255]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg257:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, killed [[Reg256]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg258:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 0, [[Reg170]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg259:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg258]]:vgpr_32, [[Reg257]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg260:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg257]]:vgpr_32, killed [[Reg259]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg261:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg257]]:vgpr_32, killed [[Reg260]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg262:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg253]]:vgpr_32, killed [[Reg261]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg263:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg262]]:vgpr_32, [[Reg170]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg264:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg253]]:vgpr_32, killed [[Reg263]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg265:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 [[Reg264]]:vgpr_32, [[Reg170]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg266:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg262]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg267:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg262]]:vgpr_32, 0, killed [[Reg266]]:vgpr_32, [[Reg265]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg268:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg264]]:vgpr_32, [[Reg170]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg269:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg264]]:vgpr_32, 0, killed [[Reg268]]:vgpr_32, killed [[Reg265]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg270:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 killed [[Reg269]]:vgpr_32, killed [[Reg170]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg271:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg267]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg272:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg267]]:vgpr_32, 0, killed [[Reg271]]:vgpr_32, killed [[Reg270]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg273:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg168]]:vgpr_32, killed [[Reg128]]:vgpr_32, killed [[Reg272]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg274:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg129]]:vgpr_32, killed [[Reg167]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg275:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg273]]:vgpr_32, killed [[Reg274]]:vgpr_32, killed [[Reg166]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg276:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg275]]:vgpr_32, killed [[Reg164]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg277:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg159]]:vgpr_32, killed [[Reg139]]:vgpr_32, killed [[Reg276]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg278:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg140]]:vgpr_32, killed [[Reg155]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg279:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg277]]:vgpr_32, killed [[Reg278]]:vgpr_32, killed [[Reg151]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg280:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg279]]:vgpr_32, [[Reg93]]:vgpr_32, implicit $exec +; CHECK-NEXT: FLAT_STORE_DWORD killed [[Reg154]]:vreg_64, [[Reg280]]:vgpr_32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array7, i64 68)`) +; CHECK-NEXT: [[Reg281:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg92]]:vgpr_32, killed [[Reg131]]:vgpr_32, killed [[Reg280]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg282:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[Reg146]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg283:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e64 0, killed [[Reg282]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg284:%[0-9]+]]:vgpr_32 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, 1333788670, 0, killed [[Reg283]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg285:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, killed [[Reg284]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg286:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 0, [[Reg146]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg287:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg286]]:vgpr_32, [[Reg285]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg288:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg285]]:vgpr_32, killed [[Reg287]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg289:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg285]]:vgpr_32, killed [[Reg288]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg290:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg281]]:vgpr_32, killed [[Reg289]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg291:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg290]]:vgpr_32, [[Reg146]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg292:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg281]]:vgpr_32, killed [[Reg291]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg293:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 [[Reg292]]:vgpr_32, [[Reg146]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg294:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg290]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg295:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg290]]:vgpr_32, 0, killed [[Reg294]]:vgpr_32, [[Reg293]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg296:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg292]]:vgpr_32, [[Reg146]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg297:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg292]]:vgpr_32, 0, killed [[Reg296]]:vgpr_32, killed [[Reg293]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg298:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 killed [[Reg297]]:vgpr_32, killed [[Reg146]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg299:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg295]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg300:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg295]]:vgpr_32, 0, killed [[Reg299]]:vgpr_32, killed [[Reg298]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg301:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg300]]:vgpr_32, killed [[Reg145]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg302:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg301]]:vgpr_32, killed [[Reg144]]:vgpr_32, killed [[Reg143]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg303:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg302]]:vgpr_32, [[Reg142]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg304:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg303]]:vgpr_32, [[Reg46]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg305:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg134]]:vreg_64, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 84)`) +; CHECK-NEXT: [[Reg306:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg305]]:vgpr_32, killed [[Reg184]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg307:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg306]]:vgpr_32, killed [[Reg88]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg308:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg304]]:vgpr_32, %subreg.sub0, killed [[Reg307]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: FLAT_STORE_DWORDX2 killed [[Reg96]]:vreg_64, killed [[Reg308]]:vreg_64, 76, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr getelementptr inbounds nuw (i8, ptr @array4, i64 76)`, align 4) +; CHECK-NEXT: [[Reg309:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg149]]:vreg_64, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array6, i64 28)`) +; CHECK-NEXT: [[Reg310:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg309]]:vgpr_32, [[Reg110]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg311:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg103]]:vgpr_32, killed [[Reg93]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg312:%[0-9]+]]:vgpr_32, [[Reg313:%[0-9]+]]:sreg_32 = V_ADD_CO_U32_e64 [[Reg310]]:vgpr_32, [[Reg311]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg314:%[0-9]+]]:sreg_32 = S_XOR_B32 [[Reg313]]:sreg_32, -1, implicit-def dead $scc +; CHECK-NEXT: [[Reg315:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg312]]:vgpr_32, killed [[Reg142]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_SHORT_D16_HI [[Reg20]]:vreg_64, [[Reg315]]:vgpr_32, 2, 0, implicit $exec :: (store (s16) into %ir.p8 + 2, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_SHORT killed [[Reg20]]:vreg_64, [[Reg315]]:vgpr_32, 0, 0, implicit $exec :: (store (s16) into %ir.p8, addrspace 1) +; CHECK-NEXT: [[Reg316:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg314]]:sreg_32, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.7 +; EMPTY: +; CHECK: bb.5.Flow5: +; CHECK-NEXT: ; predecessors: %bb.4, %bb.7 +; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.8(0x40000000); %bb.6(50.00%), %bb.8(50.00%) +; EMPTY: +; CHECK: [[Reg317:%[0-9]+]]:vreg_64 = PHI [[Reg23]]:vreg_64, %bb.4, undef [[Reg318:%[0-9]+]]:vreg_64, %bb.7 +; CHECK-NEXT: [[Reg319:%[0-9]+]]:vgpr_32 = PHI [[Reg311]]:vgpr_32, %bb.4, undef [[Reg320:%[0-9]+]]:vgpr_32, %bb.7 +; CHECK-NEXT: [[Reg321:%[0-9]+]]:sreg_32 = SI_ELSE killed [[Reg316]]:sreg_32, %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.6 +; EMPTY: +; CHECK: bb.6.bb4: +; CHECK-NEXT: ; predecessors: %bb.5 +; CHECK-NEXT: successors: %bb.8(0x80000000); %bb.8(100.00%) +; EMPTY: +; CHECK: GLOBAL_STORE_DWORD killed [[Reg317]]:vreg_64, killed [[Reg319]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p4, addrspace 1) +; CHECK-NEXT: S_BRANCH %bb.8 +; EMPTY: +; CHECK: bb.7.bb5: +; CHECK-NEXT: ; predecessors: %bb.4 +; CHECK-NEXT: successors: %bb.5(0x80000000); %bb.5(100.00%) +; EMPTY: +; CHECK: GLOBAL_STORE_DWORD killed [[Reg22]]:vreg_64, killed [[Reg312]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p5, addrspace 1) +; CHECK-NEXT: S_BRANCH %bb.5 +; EMPTY: +; CHECK: bb.8.bb6: +; CHECK-NEXT: ; predecessors: %bb.5, %bb.6 +; CHECK-NEXT: successors: %bb.11(0x40000000), %bb.9(0x40000000); %bb.11(50.00%), %bb.9(50.00%) +; EMPTY: +; CHECK: SI_END_CF killed [[Reg321]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg322:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 killed [[Reg28]]:vgpr_32, killed [[Reg305]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg323:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; CHECK-NEXT: [[Reg324:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg322]]:sreg_32, %bb.9, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.11 +; EMPTY: +; CHECK: bb.9.Flow: +; CHECK-NEXT: ; predecessors: %bb.8, %bb.11 +; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.12(0x40000000); %bb.10(50.00%), %bb.12(50.00%) +; EMPTY: +; CHECK: [[Reg325:%[0-9]+]]:sreg_32 = PHI [[Reg323]]:sreg_32, %bb.8, [[Reg326:%[0-9]+]]:sreg_32, %bb.11 +; CHECK-NEXT: [[Reg327:%[0-9]+]]:vgpr_32 = PHI [[Reg10]]:vgpr_32, %bb.8, undef [[Reg328:%[0-9]+]]:vgpr_32, %bb.11 +; CHECK-NEXT: [[Reg329:%[0-9]+]]:vgpr_32 = PHI [[Reg11]]:vgpr_32, %bb.8, undef [[Reg330:%[0-9]+]]:vgpr_32, %bb.11 +; CHECK-NEXT: [[Reg331:%[0-9]+]]:vgpr_32 = PHI [[Reg310]]:vgpr_32, %bb.8, undef [[Reg332:%[0-9]+]]:vgpr_32, %bb.11 +; CHECK-NEXT: [[Reg333:%[0-9]+]]:vreg_64 = PHI [[Reg248]]:vreg_64, %bb.8, undef [[Reg334:%[0-9]+]]:vreg_64, %bb.11 +; CHECK-NEXT: [[Reg335:%[0-9]+]]:sreg_32 = SI_ELSE killed [[Reg324]]:sreg_32, %bb.12, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.10 +; EMPTY: +; CHECK: bb.10.bb7: +; CHECK-NEXT: ; predecessors: %bb.9 +; CHECK-NEXT: successors: %bb.12(0x80000000); %bb.12(100.00%) +; EMPTY: +; CHECK: [[Reg336:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg327]]:vgpr_32, %subreg.sub0, killed [[Reg329]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg337:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg331]]:vgpr_32, killed [[Reg333]].sub0:vreg_64, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg336]]:vreg_64, killed [[Reg337]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p6, addrspace 1) +; CHECK-NEXT: [[Reg338:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[Reg325]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: S_BRANCH %bb.12 +; EMPTY: +; CHECK: bb.11.bb8: +; CHECK-NEXT: ; predecessors: %bb.8 +; CHECK-NEXT: successors: %bb.9(0x80000000); %bb.9(100.00%) +; EMPTY: +; CHECK: [[Reg339:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg12]]:vgpr_32, %subreg.sub0, killed [[Reg13]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg340:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg315]]:vgpr_32, killed [[Reg176]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg339]]:vreg_64, killed [[Reg340]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p7, addrspace 1) +; CHECK-NEXT: [[Reg341:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[Reg313]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: [[Reg326]]:sreg_32 = COPY killed [[Reg341]]:sreg_32 +; CHECK-NEXT: S_BRANCH %bb.9 +; EMPTY: +; CHECK: bb.12.Flow4: +; CHECK-NEXT: ; predecessors: %bb.9, %bb.10 +; CHECK-NEXT: successors: %bb.13(0x40000000), %bb.14(0x40000000); %bb.13(50.00%), %bb.14(50.00%) +; EMPTY: +; CHECK: [[Reg342:%[0-9]+]]:sreg_32 = PHI [[Reg325]]:sreg_32, %bb.9, [[Reg338]]:sreg_32, %bb.10 +; CHECK-NEXT: SI_END_CF killed [[Reg335]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg343:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg18]]:vgpr_32, %subreg.sub0, killed [[Reg19]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg344:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg16]]:vgpr_32, %subreg.sub0, killed [[Reg17]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg345:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg342]]:sreg_32, %bb.14, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.13 +; EMPTY: +; CHECK: bb.13.bb9: +; CHECK-NEXT: ; predecessors: %bb.12 +; CHECK-NEXT: successors: %bb.14(0x80000000); %bb.14(100.00%) +; EMPTY: +; CHECK: [[Reg346:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg110]]:vgpr_32, [[Reg46]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg21]]:vreg_64, killed [[Reg346]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p1, addrspace 1) +; EMPTY: +; CHECK: bb.14.bb10: +; CHECK-NEXT: ; predecessors: %bb.12, %bb.13 +; EMPTY: +; CHECK: SI_END_CF killed [[Reg345]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg347:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg343]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p10, addrspace 1) +; CHECK-NEXT: [[Reg348:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg343]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p10 + 1, addrspace 1) +; CHECK-NEXT: [[Reg349:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg348]]:vgpr_32, 8, killed [[Reg347]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg350:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg343]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p10 + 2, addrspace 1) +; CHECK-NEXT: [[Reg351:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[Reg343]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p10 + 3, addrspace 1) +; CHECK-NEXT: [[Reg352:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg351]]:vgpr_32, 8, killed [[Reg350]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg353:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg352]]:vgpr_32, 16, killed [[Reg349]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg354:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg353]]:vgpr_32, killed [[Reg46]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg344]]:vreg_64, killed [[Reg354]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p9, addrspace 1) +; CHECK-NEXT: S_ENDPGM 0 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg19]] = 310.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 309.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 309.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 308.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 15.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 14.0 +; CHECK-NEXT: Next-use distance of Register [[Reg13]] = 295.0 +; CHECK-NEXT: Next-use distance of Register [[Reg12]] = 294.0 +; CHECK-NEXT: Next-use distance of Register [[Reg11]] = 295.0 +; CHECK-NEXT: Next-use distance of Register [[Reg10]] = 293.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg5]] = 28.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 27.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 23.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 27.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg22]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 25.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 13.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 233.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg52]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg53]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg54]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg56]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg57]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg58]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg59]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg60]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg61]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg62]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg63]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg64]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg65]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg66]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg67]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg68]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg69]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg70]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg71]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg72]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg73]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg75]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg76]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg77]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg78]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg79]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg80]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg82]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg83]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg85]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg86]] = 133.0 +; CHECK-NEXT: Next-use distance of Register [[Reg87]] = 133.0 +; CHECK-NEXT: Next-use distance of Register [[Reg88]] = 39.0 +; CHECK-NEXT: Next-use distance of Register [[Reg89]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg90]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg91]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg92]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg93]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg94]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg95]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg96]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg97]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg98]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg99]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg100]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg101]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg102]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg103]] = 211.0 +; CHECK-NEXT: Next-use distance of Register [[Reg104]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg105]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg106]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg107]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg108]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg109]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg110]] = 203.0 +; CHECK-NEXT: Next-use distance of Register [[Reg111]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg112]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg113]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg114]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg115]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg116]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg117]] = 23.0 +; CHECK-NEXT: Next-use distance of Register [[Reg118]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg119]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg120]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg121]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg122]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg123]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg124]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg125]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg126]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg127]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg128]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg129]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg130]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg131]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg132]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg133]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg134]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg135]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg136]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg137]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg138]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg139]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg140]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg141]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg142]] = 162.0 +; CHECK-NEXT: Next-use distance of Register [[Reg143]] = 160.0 +; CHECK-NEXT: Next-use distance of Register [[Reg144]] = 159.0 +; CHECK-NEXT: Next-use distance of Register [[Reg145]] = 157.0 +; CHECK-NEXT: Next-use distance of Register [[Reg146]] = 137.0 +; CHECK-NEXT: Next-use distance of Register [[Reg147]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg148]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg149]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg150]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg151]] = 128.0 +; CHECK-NEXT: Next-use distance of Register [[Reg152]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg153]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg154]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg155]] = 123.0 +; CHECK-NEXT: Next-use distance of Register [[Reg156]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg157]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg158]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg159]] = 87.0 +; CHECK-NEXT: Next-use distance of Register [[Reg160]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg161]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg162]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg163]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg164]] = 112.0 +; CHECK-NEXT: Next-use distance of Register [[Reg165]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg166]] = 109.0 +; CHECK-NEXT: Next-use distance of Register [[Reg167]] = 107.0 +; CHECK-NEXT: Next-use distance of Register [[Reg168]] = 105.0 +; CHECK-NEXT: Next-use distance of Register [[Reg169]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg170]] = 84.0 +; CHECK-NEXT: Next-use distance of Register [[Reg171]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg172]] = 81.0 +; CHECK-NEXT: Next-use distance of Register [[Reg173]] = 79.0 +; CHECK-NEXT: Next-use distance of Register [[Reg174]] = 42.0 +; CHECK-NEXT: Next-use distance of Register [[Reg175]] = 38.0 +; CHECK-NEXT: Next-use distance of Register [[Reg176]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg177]] = 17.0 +; CHECK-NEXT: Next-use distance of Register [[Reg178]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg179]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg180]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg181]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg182]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg183]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg184]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg185]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg186]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg187]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg188]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg189]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg190]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg191]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg192]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg193]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg194]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg195]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg196]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg197]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg198]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg199]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg200]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg201]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg202]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg203]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg204]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg205]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg206]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg207]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg208]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg209]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg210]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg211]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg212]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg213]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg214]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg215]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg216]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg217]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg219]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg220]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg221]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg222]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg223]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg224]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg225]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg226]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg227]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg228]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg229]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg230]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg231]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg232]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg233]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg234]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg235]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg236]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg237]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg238]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg239]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg240]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg241]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg242]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg243]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg245]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg246]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg248]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg249]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg250]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg252]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg253]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg254]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg255]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg256]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg257]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg258]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg259]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg260]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg261]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg262]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg263]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg264]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg265]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg266]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg267]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg268]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg269]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg270]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg271]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg272]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg273]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg274]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg275]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg276]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg277]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg278]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg279]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg280]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg281]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg282]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg283]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg284]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg285]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg286]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg287]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg288]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg289]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg290]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg291]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg292]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg293]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg294]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg295]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg296]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg297]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg298]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg299]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg300]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg301]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg302]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg303]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg304]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg305]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg306]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg307]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg308]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg309]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg310]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg311]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg312]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg313]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg314]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg315]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg316]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg317]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg319]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg321]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg322]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg323]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg324]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg325]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg327]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg329]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg331]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg333]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg335]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg336]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg337]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg338]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg339]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg340]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg341]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg326]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg342]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg343]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg344]] = 12.0 +; CHECK-NEXT: Next-use distance of Register [[Reg345]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg346]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg347]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg348]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg349]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg350]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg351]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg352]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg353]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg354]] = 1.0 +entry: + %ld = load i32, ptr addrspace(1) %p4, align 2 + %ld0 = load i32, ptr addrspace(1) %p5, align 8 + %ld1 = load i32, ptr addrspace(1) %p1, align 4 + %gep1 = getelementptr inbounds i32, ptr addrspace(1) %p1, i64 3 + %ld2 = load i32, ptr addrspace(1) %gep1, align 1 + %tmp1 = add i32 %ld, %ld0 + store i32 %tmp1, ptr addrspace(1) %p1 + %cond1 = icmp uge i32 %ld0, %tmp1 + br i1 %cond1, label %bb1, label %bb2 + +bb1: + %load1 = load i32, ptr addrspace(1) %p3, align 4 + %load2 = load <8 x i32>, ptr addrspace(1) %p3, align 1 + %extract1 = extractelement < 8 x i32> %load2, i32 1 + %extract2 = extractelement < 8 x i32> %load2, i32 2 + %tmp84 = add i32 %load1, %extract1 + %tmp85 = mul i32 %tmp84, %extract2 + store i32 %tmp85, ptr addrspace(1) %p3 + store i32 %tmp84, ptr addrspace(1) %p8 + br label %bb3 + +bb2: + %ld3 = load i32, ptr addrspace(3) %p2, align 1 + %load4 = load <8 x i32>, ptr addrspace(1) %p8, align 1 + %extract11 = extractelement < 8 x i32> %load4, i32 1 + %extract12 = extractelement < 8 x i32> %load4, i32 2 + %tmp70 = mul i32 %ld3, %extract11 + %tmp71 = add i32 %tmp70, %extract12 + %tmp72 = sub i32 %tmp71, %ld1 + br label %bb3 + +bb3: + %phi1 = phi i32 [ %load1, %bb1 ], [ %tmp72, %bb2 ] + %phi2 = phi i32 [ %load1, %bb1 ], [ %tmp1, %bb2 ] + %phi3 = phi i32 [ %extract1, %bb1 ], [ %extract11, %bb2 ] + %phi4 = phi i32 [ %extract2, %bb1 ], [ %extract12, %bb2 ] + %tmp73 = mul i32 %phi1, %ld1 + %tmp74 = add i32 %tmp73, %ld + %tmp75 = mul i32 %tmp74, %tmp1 + %tmp76 = add i32 %tmp75, %phi2 + %tmp77 = sub i32 %tmp76, %phi3 + %tmp78 = mul i32 %tmp76, %phi4 + %tmp2 = mul i32 %phi1, %phi3 + %idx10 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 1, i64 0 + %val0 = load i32, i32* %idx10, align 4 + %tmp3 = add i32 %val0, %phi4 + %idx20 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 0, i64 1 + store i32 %tmp3, i32 *%idx20 + %load22 = load <8 x i32>, ptr addrspace(1) %p3, align 1 + %extract3 = extractelement < 8 x i32> %load22, i32 6 + %idx12 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 1, i64 2 + %val2 = load i32, i32* %idx12, align 4 + %tmp4 = mul i32 %val2, %tmp2 + %tmp5= add i32 %tmp3, %phi3 + %tmp6 = mul i32 %phi4, %val0 + %tmp7 = sub i32 %tmp6, %tmp4 + %tmp8 = mul i32 %tmp5, %tmp7 + %tmp9 = add i32 %tmp8, %extract3 + %idx22 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 3, i64 2 + store i32 %tmp9, i32 *%idx22 + %extract4 = extractelement < 8 x i32> %load22, i32 7 + %idx13 = getelementptr inbounds [5 x i32], [5 x i32]* @array5, i64 1, i64 0 + %val3 = load i32, i32* %idx13, align 4 + %tmp10 = mul i32 %val3, %extract4 + %tmp11 = add i32 %tmp10, %phi2 + %tmp12 = sub i32 %val0, %extract4 + %tmp13 = mul i32 %phi4, %phi3 + %tmp14 = add i32 %tmp11, %tmp5 + %tmp15 = add i32 %tmp10, %tmp8 + %tmp16 = sub i32 %extract4, %phi4 + %tmp17 = add i32 %tmp12, %val2 + %tmp18 = add i32 %val0, %tmp9 + %idx601 = getelementptr inbounds [5 x i32], [5 x i32]* @array6, i64 2, i64 1 + %val601 = load i32, i32* %idx601, align 1 + %tmp19 = mul i32 %val601, %tmp12 + %idx701 = getelementptr inbounds [5 x i32], [5 x i32]* @array7, i64 1, i64 0 + %val701 = load i32, i32* %idx701, align 2 + %tmp20 = sub i32 %val701, %tmp11 + %idx801 = getelementptr inbounds [5 x i32], [5 x i32]* @array8, i64 2, i64 1 + %val801 = load i32, i32* %idx801, align 8 + %tmp21 = add i32 %val801, %tmp10 + %idx901 = getelementptr inbounds [5 x i32], [5 x i32]* @array9, i64 1, i64 1 + %val901 = load i32, i32* %idx901, align 1 + %tmp22 = mul i32 %val901, %tmp9 + %idx602 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 4, i64 1 + %val602 = load i32, i32* %idx602, align 1 + %tmp23 = add i32 %val602, %tmp8 + %idx702 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 4, i64 0 + %val702 = load i32, i32* %idx702, align 2 + %tmp24 = sub i32 %val702, %tmp7 + %idx802 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 4, i64 0 + %val802 = load i32, i32* %idx802, align 8 + %tmp25 = add i32 %val802, %tmp6 + %idx902 = getelementptr inbounds [5 x i32], [5 x i32]* @array5, i64 4, i64 2 + %val902 = load i32, i32* %idx902, align 1 + %tmp26 = mul i32 %val902, %tmp5 + %idx800 = getelementptr inbounds [5 x i32], [5 x i32]* @array8, i64 1, i64 0 + %val800 = load i32, i32* %idx800, align 4 + %tmp27 = add i32 %val800, %tmp4 + %idx15 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 0, i64 2 + %val5 = load i32, i32* %idx15, align 4 + %tmp28 = mul i32 %val5, %tmp3 + %idx16 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 0, i64 3 + %val6 = load i32, i32* %idx16, align 4 + %tmp206 = add i32 %val6, %tmp9 + %idx17 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 0, i64 1 + %val7 = load i32, i32* %idx17, align 4 + %tmp207 = add i32 %val7, %tmp3 + %idx18 = getelementptr inbounds [5 x i32], [5 x i32]* @array5, i64 0, i64 1 + %val8 = load i32, i32* %idx18, align 4 + %tmp208 = mul i32 %val8, %tmp10 + %load3 = load <8 x i32>, ptr addrspace(1) %p4, align 1 + %extract7 = extractelement < 8 x i32> %load3, i32 4 + %tmp209 = add i32 %extract7, %tmp11 + %extract8 = extractelement < 8 x i32> %load3, i32 3 + %tmp30 = mul i32 %extract8, %tmp12 + %tmp31 = add i32 %tmp30, %tmp209 + %tmp32 = udiv i32 %tmp31, %tmp208 + %tmp33 = add i32 %tmp32, %tmp207 + %tmp34 = mul i32 %tmp33, %val902 + %tmp35 = sub i32 %tmp34, %val901 + %tmp36 = add i32 %tmp35, %tmp206 + %tmp37 = mul i32 %tmp36, %tmp78 + %tmp38 = add i32 %tmp37, %tmp77 + %tmp39 = sub i32 %tmp38, %extract3 + %tmp40 = udiv i32 %tmp39, %extract4 + store i32 %tmp40, ptr addrspace(3) %p2, align 1 + %tmp41 = sub i32 %tmp40, %val800 + %tmp42 = mul i32 %tmp41, %extract7 + %tmp43 = add i32 %tmp42, %val801 + %tmp44 = mul i32 %tmp43, %val2 + %tmp45 = add i32 %tmp44, %val0 + %tmp46 = sub i32 %tmp45, %tmp2 + %tmp47 = add i32 %tmp46, %tmp28 + %tmp48 = mul i32 %tmp47, %tmp27 + %tmp49 = udiv i32 %tmp48, %tmp26 + %tmp50 = add i32 %tmp49, %tmp25 + %tmp51 = sub i32 %tmp50, %tmp24 + %tmp52 = add i32 %tmp51, %tmp23 + %tmp53 = mul i32 %tmp52, %tmp22 + %tmp54 = add i32 %tmp53, %tmp21 + %tmp55 = sub i32 %tmp54, %tmp20 + %tmp56 = add i32 %tmp55, %tmp19 + %tmp57 = mul i32 %tmp56, %tmp3 + %idx700 = getelementptr inbounds [5 x i32], [5 x i32]* @array7, i64 3, i64 2 + store i32 %tmp57, i32 *%idx700 + %tmp58 = add i32 %tmp57, %tmp18 + %tmp59 = udiv i32 %tmp58, %tmp17 + %tmp60 = mul i32 %tmp59, %tmp16 + %tmp61 = add i32 %tmp60, %tmp15 + %tmp62 = add i32 %tmp61, %tmp14 + %tmp63 = mul i32 %tmp62, %tmp13 + %tmp64 = mul i32 %tmp63, %ld2 + %idx23 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 3, i64 4 + store i32 %tmp64, i32 *%idx23 + %extract17 = extractelement < 8 x i32> %load3, i32 4 + %idx14 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 4, i64 1 + %val4 = load i32, i32* %idx14, align 4 + %tmp65 = add i32 %val4, %extract17 + %tmp66 = sub i32 %tmp65, %tmp2 + %idx24 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 3, i64 5 + store i32 %tmp66, i32 *%idx24 + %extract9 = extractelement < 8 x i32> %load22, i32 0 + %idx600 = getelementptr inbounds [5 x i32], [5 x i32]* @array6, i64 1, i64 2 + %val600 = load i32, i32* %idx600, align 4 + %tmp67 = add i32 %val600, %extract9 + %extract10 = extractelement < 8 x i32> %load22, i32 5 + %tmp68 = sub i32 %extract10, %tmp3 + %tmp69 = add i32 %tmp67, %tmp68 + %tmp79 = mul i32 %tmp69, %tmp13 + store i32 %tmp79, ptr addrspace(1) %p8, align 2 + %cond2 = icmp ult i32 %tmp69, %tmp68 + br i1 %cond2, label %bb4, label %bb5 + +bb4: + store i32 %tmp68, ptr addrspace(1) %p4 + br label %bb6 + +bb5: + store i32 %tmp69, ptr addrspace(1) %p5 + br label %bb6 + +bb6: + %tmp80 = mul i32 %tmp66, %ld2 + %cond3 = icmp ule i32 %ld1, %val4 + br i1 %cond3, label %bb7, label %bb8 + +bb7: + %tmp81 = add i32 %tmp67, %tmp45 + store i32 %tmp81, ptr addrspace(1) %p6 + br label %bb9 + +bb8: + %tmp82 = add i32 %tmp79, %val8 + store i32 %tmp82, ptr addrspace(1) %p7 + %xor = xor i1 %cond2, %cond3 + br i1 %xor, label %bb9, label %bb10 + +bb9: + %phi5 = phi i32 [ %tmp81, %bb7], [%tmp82, %bb8] + %tmp83 = add i32 %extract9, %ld2 + store i32 %tmp83, ptr addrspace(1) %p1 + br label %bb10 + +bb10: + %ld10 = load i32, ptr addrspace(1) %p10, align 1 + %tmp90 = add i32 %ld10, %ld2 + store i32 %tmp90, ptr addrspace(1) %p9, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_multiple_spills2.ll b/llvm/test/CodeGen/AMDGPU/test_ers_multiple_spills2.ll new file mode 100644 index 0000000000000..f86e9b2fe2d5d --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_multiple_spills2.ll @@ -0,0 +1,1029 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +@array2 = global [5 x i32] zeroinitializer, align 4 +@array3 = global [5 x i32] zeroinitializer, align 4 +@array4 = global [5 x i32] zeroinitializer, align 4 +@array5 = global [5 x i32] zeroinitializer, align 4 + +@array6 = global [5 x i32] zeroinitializer, align 4 +@array7 = global [5 x i32] zeroinitializer, align 4 +@array8 = global [5 x i32] zeroinitializer, align 4 +@array9 = global [5 x i32] zeroinitializer, align 4 + +; +; bb.0.entry +; / | +; bb.1.bb1 | +; \ | +; bb.2.bb2 +; / | +; bb.5.bb8 | +; \ | +; bb.3.Flow +; / | +; bb.4.bb7 | +; \ | +; bb.6.Flow1 +; / | +; bb.7.bb9 | +; \ | +; bb.8.bb10 +; +define amdgpu_ps void @test(ptr addrspace(1) %p1, ptr addrspace(3) %p2, ptr addrspace(1) %p3, ptr addrspace(1) %p4, ptr addrspace(1) %p5, ptr addrspace(1) %p6, ptr addrspace(1) %p7, ptr addrspace(1) %p8, ptr addrspace(1) %p9, ptr addrspace(1) %p10, ptr addrspace(1) %p11, i32 %arg1, i32 %arg2) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]], $vgpr5 in [[Reg6:%[0-9]+]], $vgpr6 in [[Reg7:%[0-9]+]], $vgpr7 in [[Reg8:%[0-9]+]], $vgpr8 in [[Reg9:%[0-9]+]], $vgpr9 in [[Reg10:%[0-9]+]], $vgpr10 in [[Reg11:%[0-9]+]], $vgpr11 in [[Reg12:%[0-9]+]], $vgpr12 in [[Reg13:%[0-9]+]], $vgpr13 in [[Reg14:%[0-9]+]], $vgpr14 in [[Reg15:%[0-9]+]], $vgpr15 in [[Reg16:%[0-9]+]], $vgpr16 in [[Reg17:%[0-9]+]], $vgpr17 in [[Reg18:%[0-9]+]], $vgpr18 in [[Reg19:%[0-9]+]], $vgpr19 in [[Reg20:%[0-9]+]], $vgpr20 in [[Reg21:%[0-9]+]], $vgpr21 in [[Reg22:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000); %bb.1(50.00%), %bb.2(50.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21 +; CHECK-NEXT: [[Reg22]]:vgpr_32 = COPY killed $vgpr21 +; CHECK-NEXT: [[Reg21]]:vgpr_32 = COPY killed $vgpr20 +; CHECK-NEXT: [[Reg20]]:vgpr_32 = COPY killed $vgpr19 +; CHECK-NEXT: [[Reg19]]:vgpr_32 = COPY killed $vgpr18 +; CHECK-NEXT: [[Reg18]]:vgpr_32 = COPY killed $vgpr17 +; CHECK-NEXT: [[Reg17]]:vgpr_32 = COPY killed $vgpr16 +; CHECK-NEXT: [[Reg16]]:vgpr_32 = COPY killed $vgpr15 +; CHECK-NEXT: [[Reg15]]:vgpr_32 = COPY killed $vgpr14 +; CHECK-NEXT: [[Reg14]]:vgpr_32 = COPY killed $vgpr13 +; CHECK-NEXT: [[Reg13]]:vgpr_32 = COPY killed $vgpr12 +; CHECK-NEXT: [[Reg12]]:vgpr_32 = COPY killed $vgpr11 +; CHECK-NEXT: [[Reg11]]:vgpr_32 = COPY killed $vgpr10 +; CHECK-NEXT: [[Reg10]]:vgpr_32 = COPY killed $vgpr9 +; CHECK-NEXT: [[Reg9]]:vgpr_32 = COPY killed $vgpr8 +; CHECK-NEXT: [[Reg8]]:vgpr_32 = COPY killed $vgpr7 +; CHECK-NEXT: [[Reg7]]:vgpr_32 = COPY killed $vgpr6 +; CHECK-NEXT: [[Reg6]]:vgpr_32 = COPY killed $vgpr5 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg23:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg10]]:vgpr_32, %subreg.sub0, killed [[Reg11]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg24:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg12]]:vgpr_32, %subreg.sub0, killed [[Reg13]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg25:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg4]]:vgpr_32, %subreg.sub0, killed [[Reg5]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg26:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg20]]:vgpr_32, %subreg.sub0, killed [[Reg21]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg27:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg18]]:vgpr_32, %subreg.sub0, killed [[Reg19]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg28:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg14]]:vgpr_32, %subreg.sub0, killed [[Reg15]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg29:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg16]]:vgpr_32, %subreg.sub0, killed [[Reg17]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg30:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg1]]:vgpr_32, %subreg.sub0, killed [[Reg2]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg31:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg8]]:vgpr_32, %subreg.sub0, killed [[Reg9]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg32:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg6]]:vgpr_32, %subreg.sub0, killed [[Reg7]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg33:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg32]]:vreg_64, 0, 0, implicit $exec :: (load (s16) from %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg34:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg32]]:vreg_64, 2, 0, implicit $exec :: (load (s16) from %ir.p4 + 2, addrspace 1) +; CHECK-NEXT: [[Reg35:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg34]]:vgpr_32, 16, killed [[Reg33]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg36:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[Reg31]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p5, align 8, addrspace 1) +; CHECK-NEXT: [[Reg37:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg30]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg38:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg29]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p9, addrspace 1) +; CHECK-NEXT: [[Reg39:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg29]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p9 + 1, addrspace 1) +; CHECK-NEXT: [[Reg40:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg39]]:vgpr_32, 8, killed [[Reg38]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg41:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg29]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p9 + 2, addrspace 1) +; CHECK-NEXT: [[Reg42:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg29]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p9 + 3, addrspace 1) +; CHECK-NEXT: [[Reg43:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg42]]:vgpr_32, 8, killed [[Reg41]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg44:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg43]]:vgpr_32, 16, killed [[Reg40]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg45:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg28]]:vreg_64, 0, 0, implicit $exec :: (load (s16) from %ir.p8, addrspace 1) +; CHECK-NEXT: [[Reg46:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg28]]:vreg_64, 2, 0, implicit $exec :: (load (s16) from %ir.p8 + 2, addrspace 1) +; CHECK-NEXT: [[Reg47:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg27]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p10, addrspace 1) +; CHECK-NEXT: [[Reg48:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg27]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p10 + 1, addrspace 1) +; CHECK-NEXT: [[Reg49:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg27]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p10 + 2, addrspace 1) +; CHECK-NEXT: [[Reg50:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg27]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p10 + 3, addrspace 1) +; CHECK-NEXT: [[Reg51:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg35]]:vgpr_32, [[Reg36]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg26]]:vreg_64, killed [[Reg51]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p11, addrspace 1) +; CHECK-NEXT: [[Reg52:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg35]]:vgpr_32, [[Reg36]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg30]]:vreg_64, [[Reg52]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg53:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 0, 0, implicit $exec :: (load (s8) from %ir.p2, addrspace 3) +; CHECK-NEXT: [[Reg54:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 1, 0, implicit $exec :: (load (s8) from %ir.p2 + 1, addrspace 3) +; CHECK-NEXT: [[Reg55:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 2, 0, implicit $exec :: (load (s8) from %ir.p2 + 2, addrspace 3) +; CHECK-NEXT: [[Reg56:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 3, 0, implicit $exec :: (load (s8) from %ir.p2 + 3, addrspace 3) +; CHECK-NEXT: [[Reg57:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg54]]:vgpr_32, 8, killed [[Reg53]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg58:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg56]]:vgpr_32, 8, killed [[Reg55]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg59:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg58]]:vgpr_32, 16, killed [[Reg57]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg60:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg25]]:vreg_64, 8, 0, implicit $exec :: (load (s8) from %ir.p3 + 8, addrspace 1) +; CHECK-NEXT: [[Reg61:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg25]]:vreg_64, 9, 0, implicit $exec :: (load (s8) from %ir.p3 + 9, addrspace 1) +; CHECK-NEXT: [[Reg62:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg61]]:vgpr_32, 8, killed [[Reg60]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg63:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg25]]:vreg_64, 10, 0, implicit $exec :: (load (s8) from %ir.p3 + 10, addrspace 1) +; CHECK-NEXT: [[Reg64:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg25]]:vreg_64, 11, 0, implicit $exec :: (load (s8) from %ir.p3 + 11, addrspace 1) +; CHECK-NEXT: [[Reg65:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg64]]:vgpr_32, 8, killed [[Reg63]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg66:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg65]]:vgpr_32, 16, killed [[Reg62]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg67:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg25]]:vreg_64, 4, 0, implicit $exec :: (load (s8) from %ir.p3 + 4, addrspace 1) +; CHECK-NEXT: [[Reg68:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg25]]:vreg_64, 5, 0, implicit $exec :: (load (s8) from %ir.p3 + 5, addrspace 1) +; CHECK-NEXT: [[Reg69:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg68]]:vgpr_32, 8, killed [[Reg67]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg70:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg25]]:vreg_64, 6, 0, implicit $exec :: (load (s8) from %ir.p3 + 6, addrspace 1) +; CHECK-NEXT: [[Reg71:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg25]]:vreg_64, 7, 0, implicit $exec :: (load (s8) from %ir.p3 + 7, addrspace 1) +; CHECK-NEXT: [[Reg72:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg71]]:vgpr_32, 8, killed [[Reg70]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg73:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg72]]:vgpr_32, 16, killed [[Reg69]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg74:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg25]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg75:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg25]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p3 + 1, addrspace 1) +; CHECK-NEXT: [[Reg76:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg75]]:vgpr_32, 8, killed [[Reg74]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg77:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg25]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p3 + 2, addrspace 1) +; CHECK-NEXT: [[Reg78:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg25]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p3 + 3, addrspace 1) +; CHECK-NEXT: [[Reg79:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg78]]:vgpr_32, 8, killed [[Reg77]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg80:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg79]]:vgpr_32, 16, killed [[Reg76]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg81:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg59]]:vgpr_32, killed [[Reg80]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg82:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg81]]:vgpr_32, killed [[Reg73]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg25]]:vreg_64, [[Reg82]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg83:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg29]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p9, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg24]]:vreg_64, [[Reg59]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p7, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg29]]:vreg_64, killed [[Reg81]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p9, addrspace 1) +; CHECK-NEXT: [[Reg84:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg36]]:vgpr_32, killed [[Reg22]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg85:%[0-9]+]]:sreg_32 = SI_IF [[Reg84]]:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.1 +; EMPTY: +; CHECK: bb.1.bb1: +; CHECK-NEXT: ; predecessors: %bb.0 +; CHECK-NEXT: successors: %bb.2(0x80000000); %bb.2(100.00%) +; EMPTY: +; CHECK: [[Reg86:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg46]]:vgpr_32, 16, killed [[Reg45]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg87:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[Reg25]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg88:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg27]]:vreg_64, 8, 0, implicit $exec :: (load (s8) from %ir.p10 + 8, addrspace 1) +; CHECK-NEXT: [[Reg89:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg27]]:vreg_64, 9, 0, implicit $exec :: (load (s8) from %ir.p10 + 9, addrspace 1) +; CHECK-NEXT: [[Reg90:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg89]]:vgpr_32, 8, killed [[Reg88]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg91:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg27]]:vreg_64, 10, 0, implicit $exec :: (load (s8) from %ir.p10 + 10, addrspace 1) +; CHECK-NEXT: [[Reg92:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg27]]:vreg_64, 11, 0, implicit $exec :: (load (s8) from %ir.p10 + 11, addrspace 1) +; CHECK-NEXT: [[Reg93:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg92]]:vgpr_32, 8, killed [[Reg91]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg94:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg93]]:vgpr_32, 16, killed [[Reg90]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg95:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg27]]:vreg_64, 4, 0, implicit $exec :: (load (s8) from %ir.p10 + 4, addrspace 1) +; CHECK-NEXT: [[Reg96:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg27]]:vreg_64, 5, 0, implicit $exec :: (load (s8) from %ir.p10 + 5, addrspace 1) +; CHECK-NEXT: [[Reg97:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg96]]:vgpr_32, 8, killed [[Reg95]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg98:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg27]]:vreg_64, 6, 0, implicit $exec :: (load (s8) from %ir.p10 + 6, addrspace 1) +; CHECK-NEXT: [[Reg99:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg27]]:vreg_64, 7, 0, implicit $exec :: (load (s8) from %ir.p10 + 7, addrspace 1) +; CHECK-NEXT: [[Reg100:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg99]]:vgpr_32, 8, killed [[Reg98]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg101:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg100]]:vgpr_32, 16, killed [[Reg97]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg102:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg87]]:vgpr_32, [[Reg101]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg103:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg102]]:vgpr_32, [[Reg94]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg23]]:vreg_64, killed [[Reg103]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p6, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg28]]:vreg_64, killed [[Reg102]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p8, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg29]]:vreg_64, [[Reg101]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p9, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg27]]:vreg_64, [[Reg87]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p10, addrspace 1) +; CHECK-NEXT: [[Reg104:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg87]]:vgpr_32, [[Reg87]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg105:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg87]]:vgpr_32, [[Reg101]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg106:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg101]]:vgpr_32, [[Reg94]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg107:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg87]]:vgpr_32, killed [[Reg94]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg108:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[Reg35]]:vgpr_32, %subreg.sub0, undef [[Reg109:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg110:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 [[Reg104]]:vgpr_32, killed [[Reg36]]:vgpr_32, killed [[Reg108]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg111:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[Reg105]]:vgpr_32, %subreg.sub0, undef [[Reg112:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg113:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 killed [[Reg110]].sub0:vreg_64, killed [[Reg52]]:vgpr_32, killed [[Reg111]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg114:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg113]].sub0:vreg_64, [[Reg106]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg115:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg113]].sub0:vreg_64, [[Reg107]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg116:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg104]]:vgpr_32, [[Reg106]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg117:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array2, target-flags(amdgpu-gotprel32-hi) @array2, implicit-def dead $scc +; CHECK-NEXT: [[Reg118:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg117]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg119:%[0-9]+]]:vreg_64 = COPY killed [[Reg118]]:sreg_64_xexec +; CHECK-NEXT: [[Reg120:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg119]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array2, i64 20)`) +; CHECK-NEXT: [[Reg121:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg120]]:vgpr_32, [[Reg107]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg122:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array4, target-flags(amdgpu-gotprel32-hi) @array4, implicit-def dead $scc +; CHECK-NEXT: [[Reg123:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg122]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg124:%[0-9]+]]:vreg_64 = COPY killed [[Reg123]]:sreg_64_xexec +; CHECK-NEXT: FLAT_STORE_DWORD [[Reg124]]:vreg_64, [[Reg121]]:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array4, i64 4)`) +; CHECK-NEXT: [[Reg125:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg24]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p7, addrspace 1) +; CHECK-NEXT: [[Reg126:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg24]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p7 + 1, addrspace 1) +; CHECK-NEXT: [[Reg127:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg126]]:vgpr_32, 8, killed [[Reg125]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg128:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg24]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p7 + 2, addrspace 1) +; CHECK-NEXT: [[Reg129:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg24]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p7 + 3, addrspace 1) +; CHECK-NEXT: [[Reg130:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg129]]:vgpr_32, 8, killed [[Reg128]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg131:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg130]]:vgpr_32, 16, killed [[Reg127]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg132:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg24]]:vreg_64, 28, 0, implicit $exec :: (load (s8) from %ir.p7 + 28, addrspace 1) +; CHECK-NEXT: [[Reg133:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg24]]:vreg_64, 29, 0, implicit $exec :: (load (s8) from %ir.p7 + 29, addrspace 1) +; CHECK-NEXT: [[Reg134:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg133]]:vgpr_32, 8, killed [[Reg132]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg135:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg24]]:vreg_64, 30, 0, implicit $exec :: (load (s8) from %ir.p7 + 30, addrspace 1) +; CHECK-NEXT: [[Reg136:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg24]]:vreg_64, 31, 0, implicit $exec :: (load (s8) from %ir.p7 + 31, addrspace 1) +; CHECK-NEXT: [[Reg137:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg136]]:vgpr_32, 8, killed [[Reg135]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg138:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg137]]:vgpr_32, 16, killed [[Reg134]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg139:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg24]]:vreg_64, 24, 0, implicit $exec :: (load (s8) from %ir.p7 + 24, addrspace 1) +; CHECK-NEXT: [[Reg140:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg24]]:vreg_64, 25, 0, implicit $exec :: (load (s8) from %ir.p7 + 25, addrspace 1) +; CHECK-NEXT: [[Reg141:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg140]]:vgpr_32, 8, killed [[Reg139]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg142:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg24]]:vreg_64, 26, 0, implicit $exec :: (load (s8) from %ir.p7 + 26, addrspace 1) +; CHECK-NEXT: [[Reg143:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg24]]:vreg_64, 27, 0, implicit $exec :: (load (s8) from %ir.p7 + 27, addrspace 1) +; CHECK-NEXT: [[Reg144:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg143]]:vgpr_32, 8, killed [[Reg142]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg145:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg144]]:vgpr_32, 16, killed [[Reg141]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg146:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg119]]:vreg_64, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array2, i64 28)`) +; CHECK-NEXT: [[Reg147:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg146]]:vgpr_32, [[Reg116]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg148:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg121]]:vgpr_32, [[Reg106]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg149:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg107]]:vgpr_32, [[Reg120]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg150:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg149]]:vgpr_32, [[Reg147]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg151:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg148]]:vgpr_32, [[Reg150]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg152:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg151]]:vgpr_32, killed [[Reg145]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg153:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array3, target-flags(amdgpu-gotprel32-hi) @array3, implicit-def dead $scc +; CHECK-NEXT: [[Reg154:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg153]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg155:%[0-9]+]]:vreg_64 = COPY killed [[Reg154]]:sreg_64_xexec +; CHECK-NEXT: FLAT_STORE_DWORD [[Reg155]]:vreg_64, [[Reg152]]:vgpr_32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 68)`) +; CHECK-NEXT: [[Reg156:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array5, target-flags(amdgpu-gotprel32-hi) @array5, implicit-def dead $scc +; CHECK-NEXT: [[Reg157:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg156]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg158:%[0-9]+]]:vreg_64 = COPY killed [[Reg157]]:sreg_64_xexec +; CHECK-NEXT: [[Reg159:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg158]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array5, i64 20)`) +; CHECK-NEXT: [[Reg160:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg159]]:vgpr_32, [[Reg138]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg161:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg160]]:vgpr_32, [[Reg105]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg162:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg120]]:vgpr_32, [[Reg138]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg163:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg107]]:vgpr_32, killed [[Reg106]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg164:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg161]]:vgpr_32, [[Reg148]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg165:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg160]]:vgpr_32, [[Reg151]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg166:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg138]]:vgpr_32, [[Reg107]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg167:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg162]]:vgpr_32, [[Reg146]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg168:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array6, target-flags(amdgpu-gotprel32-hi) @array6, implicit-def dead $scc +; CHECK-NEXT: [[Reg169:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg168]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg170:%[0-9]+]]:vreg_64 = COPY killed [[Reg169]]:sreg_64_xexec +; CHECK-NEXT: [[Reg171:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg170]]:vreg_64, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array6, i64 44)`) +; CHECK-NEXT: [[Reg172:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg171]]:vgpr_32, [[Reg162]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg173:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array7, target-flags(amdgpu-gotprel32-hi) @array7, implicit-def dead $scc +; CHECK-NEXT: [[Reg174:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg173]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg175:%[0-9]+]]:vreg_64 = COPY killed [[Reg174]]:sreg_64_xexec +; CHECK-NEXT: [[Reg176:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg175]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array7, i64 20)`) +; CHECK-NEXT: [[Reg177:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array8, target-flags(amdgpu-gotprel32-hi) @array8, implicit-def dead $scc +; CHECK-NEXT: [[Reg178:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg177]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg179:%[0-9]+]]:vreg_64 = COPY killed [[Reg178]]:sreg_64_xexec +; CHECK-NEXT: [[Reg180:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg179]]:vreg_64, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array8, i64 44)`, align 8) +; CHECK-NEXT: [[Reg181:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array9, target-flags(amdgpu-gotprel32-hi) @array9, implicit-def dead $scc +; CHECK-NEXT: [[Reg182:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg181]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg183:%[0-9]+]]:vreg_64 = COPY killed [[Reg182]]:sreg_64_xexec +; CHECK-NEXT: [[Reg184:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg183]]:vreg_64, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array9, i64 24)`) +; CHECK-NEXT: [[Reg185:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg184]]:vgpr_32, [[Reg152]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg186:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg119]]:vreg_64, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array2, i64 84)`) +; CHECK-NEXT: [[Reg187:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg186]]:vgpr_32, killed [[Reg151]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg188:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg155]]:vreg_64, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 80)`) +; CHECK-NEXT: [[Reg189:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg124]]:vreg_64, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array4, i64 80)`, align 8) +; CHECK-NEXT: [[Reg190:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg158]]:vreg_64, 88, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array5, i64 88)`) +; CHECK-NEXT: [[Reg191:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg190]]:vgpr_32, killed [[Reg148]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg192:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg179]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array8, i64 20)`) +; CHECK-NEXT: [[Reg193:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg192]]:vgpr_32, killed [[Reg147]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg194:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg124]]:vreg_64, 8, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array4, i64 8)`) +; CHECK-NEXT: [[Reg195:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg119]]:vreg_64, 12, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array2, i64 12)`) +; CHECK-NEXT: [[Reg196:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg155]]:vreg_64, 4, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 4)`) +; CHECK-NEXT: [[Reg197:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg158]]:vreg_64, 4, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array5, i64 4)`) +; CHECK-NEXT: [[Reg198:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg197]]:vgpr_32, [[Reg160]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg199:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg32]]:vreg_64, 16, 0, implicit $exec :: (load (s8) from %ir.p4 + 16, addrspace 1) +; CHECK-NEXT: [[Reg200:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg32]]:vreg_64, 17, 0, implicit $exec :: (load (s8) from %ir.p4 + 17, addrspace 1) +; CHECK-NEXT: [[Reg201:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg200]]:vgpr_32, 8, killed [[Reg199]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg202:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg32]]:vreg_64, 18, 0, implicit $exec :: (load (s8) from %ir.p4 + 18, addrspace 1) +; CHECK-NEXT: [[Reg203:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg32]]:vreg_64, 19, 0, implicit $exec :: (load (s8) from %ir.p4 + 19, addrspace 1) +; CHECK-NEXT: [[Reg204:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg203]]:vgpr_32, 8, killed [[Reg202]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg205:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg204]]:vgpr_32, 16, killed [[Reg201]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg206:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg32]]:vreg_64, 12, 0, implicit $exec :: (load (s8) from %ir.p4 + 12, addrspace 1) +; CHECK-NEXT: [[Reg207:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg32]]:vreg_64, 13, 0, implicit $exec :: (load (s8) from %ir.p4 + 13, addrspace 1) +; CHECK-NEXT: [[Reg208:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg207]]:vgpr_32, 8, killed [[Reg206]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg209:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg32]]:vreg_64, 14, 0, implicit $exec :: (load (s8) from %ir.p4 + 14, addrspace 1) +; CHECK-NEXT: [[Reg210:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[Reg32]]:vreg_64, 15, 0, implicit $exec :: (load (s8) from %ir.p4 + 15, addrspace 1) +; CHECK-NEXT: [[Reg211:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg210]]:vgpr_32, 8, killed [[Reg209]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg212:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg211]]:vgpr_32, 16, killed [[Reg208]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg213:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg212]]:vgpr_32, killed [[Reg162]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg214:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[Reg205]]:vgpr_32, [[Reg161]]:vgpr_32, killed [[Reg213]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg215:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[Reg198]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg216:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e64 0, killed [[Reg215]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg217:%[0-9]+]]:vgpr_32 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, 1333788670, 0, killed [[Reg216]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg218:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, killed [[Reg217]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg219:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 0, [[Reg198]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg220:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg219]]:vgpr_32, [[Reg218]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg221:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg218]]:vgpr_32, killed [[Reg220]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg222:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg218]]:vgpr_32, killed [[Reg221]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg223:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg214]]:vgpr_32, killed [[Reg222]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg224:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg223]]:vgpr_32, [[Reg198]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg225:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg214]]:vgpr_32, killed [[Reg224]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg226:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 [[Reg225]]:vgpr_32, [[Reg198]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg227:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg223]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg228:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg223]]:vgpr_32, 0, killed [[Reg227]]:vgpr_32, [[Reg226]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg229:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg225]]:vgpr_32, [[Reg198]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg230:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg225]]:vgpr_32, 0, killed [[Reg229]]:vgpr_32, killed [[Reg226]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg231:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 killed [[Reg230]]:vgpr_32, killed [[Reg198]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg232:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg228]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg233:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg228]]:vgpr_32, 0, killed [[Reg232]]:vgpr_32, killed [[Reg231]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg234:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg196]]:vgpr_32, [[Reg121]]:vgpr_32, killed [[Reg233]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg235:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg234]]:vgpr_32, killed [[Reg190]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg236:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg235]]:vgpr_32, killed [[Reg184]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg237:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg195]]:vgpr_32, [[Reg152]]:vgpr_32, killed [[Reg236]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg238:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg114]]:vgpr_32, %subreg.sub0, undef [[Reg239:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg240:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 killed [[Reg237]]:vgpr_32, killed [[Reg115]]:vgpr_32, killed [[Reg238]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg241:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg240]].sub0:vreg_64, [[Reg83]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg242:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[Reg138]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg243:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e64 0, killed [[Reg242]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg244:%[0-9]+]]:vgpr_32 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, 1333788670, 0, killed [[Reg243]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg245:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, killed [[Reg244]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg246:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 0, [[Reg138]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg247:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg246]]:vgpr_32, [[Reg245]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg248:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg245]]:vgpr_32, killed [[Reg247]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg249:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg245]]:vgpr_32, killed [[Reg248]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg250:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg241]]:vgpr_32, killed [[Reg249]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg251:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg250]]:vgpr_32, [[Reg138]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg252:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg241]]:vgpr_32, killed [[Reg251]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg253:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 [[Reg252]]:vgpr_32, [[Reg138]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg254:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg250]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg255:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg250]]:vgpr_32, 0, killed [[Reg254]]:vgpr_32, [[Reg253]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg256:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg252]]:vgpr_32, [[Reg138]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg257:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg252]]:vgpr_32, 0, killed [[Reg256]]:vgpr_32, killed [[Reg253]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg258:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 killed [[Reg257]]:vgpr_32, killed [[Reg138]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg259:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg255]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg260:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg255]]:vgpr_32, 0, killed [[Reg259]]:vgpr_32, killed [[Reg258]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: DS_WRITE_B8_D16_HI [[Reg3]]:vgpr_32, [[Reg260]]:vgpr_32, 2, 0, implicit $exec :: (store (s8) into %ir.p2 + 2, addrspace 3) +; CHECK-NEXT: DS_WRITE_B8_gfx9 [[Reg3]]:vgpr_32, [[Reg260]]:vgpr_32, 0, 0, implicit $exec :: (store (s8) into %ir.p2, addrspace 3) +; CHECK-NEXT: [[Reg261:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 24, [[Reg260]]:vgpr_32, implicit $exec +; CHECK-NEXT: DS_WRITE_B8_gfx9 [[Reg3]]:vgpr_32, killed [[Reg261]]:vgpr_32, 3, 0, implicit $exec :: (store (s8) into %ir.p2 + 3, addrspace 3) +; CHECK-NEXT: [[Reg262:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 8, [[Reg260]]:vgpr_32, implicit $exec +; CHECK-NEXT: DS_WRITE_B8_gfx9 killed [[Reg3]]:vgpr_32, killed [[Reg262]]:vgpr_32, 1, 0, implicit $exec :: (store (s8) into %ir.p2 + 1, addrspace 3) +; CHECK-NEXT: [[Reg263:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg260]]:vgpr_32, killed [[Reg192]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg264:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[Reg180]]:vgpr_32, %subreg.sub0, undef [[Reg265:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg266:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 killed [[Reg263]]:vgpr_32, [[Reg205]]:vgpr_32, killed [[Reg264]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg267:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[Reg120]]:vgpr_32, %subreg.sub0, undef [[Reg268:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg269:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 killed [[Reg266]].sub0:vreg_64, killed [[Reg146]]:vgpr_32, killed [[Reg267]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg270:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg269]].sub0:vreg_64, [[Reg116]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg271:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg270]]:vgpr_32, %subreg.sub0, undef [[Reg272:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg273:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 killed [[Reg194]]:vgpr_32, [[Reg121]]:vgpr_32, killed [[Reg271]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg274:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg273]].sub0:vreg_64, killed [[Reg193]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg275:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[Reg191]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg276:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e64 0, killed [[Reg275]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg277:%[0-9]+]]:vgpr_32 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, 1333788670, 0, killed [[Reg276]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg278:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, killed [[Reg277]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg279:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 0, [[Reg191]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg280:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg279]]:vgpr_32, [[Reg278]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg281:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg278]]:vgpr_32, killed [[Reg280]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg282:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg278]]:vgpr_32, killed [[Reg281]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg283:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg274]]:vgpr_32, killed [[Reg282]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg284:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg283]]:vgpr_32, [[Reg191]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg285:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg274]]:vgpr_32, killed [[Reg284]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg286:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 [[Reg285]]:vgpr_32, [[Reg191]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg287:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg283]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg288:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg283]]:vgpr_32, 0, killed [[Reg287]]:vgpr_32, [[Reg286]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg289:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg285]]:vgpr_32, [[Reg191]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg290:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg285]]:vgpr_32, 0, killed [[Reg289]]:vgpr_32, killed [[Reg286]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg291:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 killed [[Reg290]]:vgpr_32, killed [[Reg191]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg292:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg288]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg293:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg288]]:vgpr_32, 0, killed [[Reg292]]:vgpr_32, killed [[Reg291]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg294:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg189]]:vgpr_32, killed [[Reg149]]:vgpr_32, killed [[Reg293]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg295:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg150]]:vgpr_32, killed [[Reg188]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg296:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg294]]:vgpr_32, killed [[Reg295]]:vgpr_32, killed [[Reg187]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg297:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg296]]:vgpr_32, killed [[Reg185]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg298:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg180]]:vgpr_32, killed [[Reg160]]:vgpr_32, killed [[Reg297]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg299:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg161]]:vgpr_32, killed [[Reg176]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg300:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg298]]:vgpr_32, killed [[Reg299]]:vgpr_32, killed [[Reg172]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg301:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg300]]:vgpr_32, killed [[Reg121]]:vgpr_32, implicit $exec +; CHECK-NEXT: FLAT_STORE_DWORD killed [[Reg175]]:vreg_64, [[Reg301]]:vgpr_32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array7, i64 68)`) +; CHECK-NEXT: [[Reg302:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg120]]:vgpr_32, killed [[Reg152]]:vgpr_32, killed [[Reg301]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg303:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[Reg167]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg304:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e64 0, killed [[Reg303]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg305:%[0-9]+]]:vgpr_32 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, 1333788670, 0, killed [[Reg304]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg306:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, killed [[Reg305]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg307:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 0, [[Reg167]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg308:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg307]]:vgpr_32, [[Reg306]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg309:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg306]]:vgpr_32, killed [[Reg308]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg310:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg306]]:vgpr_32, killed [[Reg309]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg311:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg302]]:vgpr_32, killed [[Reg310]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg312:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg311]]:vgpr_32, [[Reg167]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg313:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg302]]:vgpr_32, killed [[Reg312]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg314:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 [[Reg313]]:vgpr_32, [[Reg167]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg315:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg311]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg316:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg311]]:vgpr_32, 0, killed [[Reg315]]:vgpr_32, [[Reg314]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg317:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg313]]:vgpr_32, [[Reg167]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg318:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg313]]:vgpr_32, 0, killed [[Reg317]]:vgpr_32, killed [[Reg314]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg319:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 killed [[Reg318]]:vgpr_32, killed [[Reg167]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg320:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg316]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg321:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg316]]:vgpr_32, 0, killed [[Reg320]]:vgpr_32, killed [[Reg319]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg322:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg321]]:vgpr_32, killed [[Reg166]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg323:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg322]]:vgpr_32, killed [[Reg165]]:vgpr_32, killed [[Reg164]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg324:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg323]]:vgpr_32, killed [[Reg163]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg325:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg324]]:vgpr_32, [[Reg44]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg326:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg155]]:vreg_64, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 84)`) +; CHECK-NEXT: [[Reg327:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg326]]:vgpr_32, killed [[Reg205]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg328:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg327]]:vgpr_32, killed [[Reg116]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg329:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg325]]:vgpr_32, %subreg.sub0, killed [[Reg328]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: FLAT_STORE_DWORDX2 killed [[Reg124]]:vreg_64, killed [[Reg329]]:vreg_64, 76, 0, implicit $exec, implicit $flat_scr :: (store (s64) into `ptr getelementptr inbounds nuw (i8, ptr @array4, i64 76)`, align 4) +; CHECK-NEXT: [[Reg330:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg170]]:vreg_64, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array6, i64 28)`) +; CHECK-NEXT: [[Reg331:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg330]]:vgpr_32, killed [[Reg131]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg332:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg331]]:vgpr_32, killed [[Reg86]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_SHORT_D16_HI [[Reg28]]:vreg_64, [[Reg332]]:vgpr_32, 2, 0, implicit $exec :: (store (s16) into %ir.p8 + 2, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_SHORT killed [[Reg28]]:vreg_64, killed [[Reg332]]:vgpr_32, 0, 0, implicit $exec :: (store (s16) into %ir.p8, addrspace 1) +; EMPTY: +; CHECK: bb.2.bb2: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.1 +; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.3(0x40000000); %bb.5(50.00%), %bb.3(50.00%) +; EMPTY: +; CHECK: [[Reg333:%[0-9]+]]:vgpr_32 = PHI [[Reg59]]:vgpr_32, %bb.0, [[Reg105]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg334:%[0-9]+]]:vgpr_32 = PHI [[Reg66]]:vgpr_32, %bb.0, [[Reg326]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg335:%[0-9]+]]:vgpr_32 = PHI [[Reg82]]:vgpr_32, %bb.0, [[Reg107]]:vgpr_32, %bb.1 +; CHECK-NEXT: SI_END_CF killed [[Reg85]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg336:%[0-9]+]]:sreg_32 = V_CMP_GT_U32_e64 [[Reg35]]:vgpr_32, killed [[Reg334]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg337:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; CHECK-NEXT: [[Reg338:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg336]]:sreg_32, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.5 +; EMPTY: +; CHECK: bb.3.Flow: +; CHECK-NEXT: ; predecessors: %bb.2, %bb.5 +; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.6(0x40000000); %bb.4(50.00%), %bb.6(50.00%) +; EMPTY: +; CHECK: [[Reg339:%[0-9]+]]:sreg_32 = PHI [[Reg337]]:sreg_32, %bb.2, [[Reg340:%[0-9]+]]:sreg_32, %bb.5 +; CHECK-NEXT: [[Reg341:%[0-9]+]]:vgpr_32 = PHI [[Reg333]]:vgpr_32, %bb.2, undef [[Reg342:%[0-9]+]]:vgpr_32, %bb.5 +; CHECK-NEXT: [[Reg343:%[0-9]+]]:vreg_64 = PHI [[Reg23]]:vreg_64, %bb.2, undef [[Reg344:%[0-9]+]]:vreg_64, %bb.5 +; CHECK-NEXT: [[Reg345:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg48]]:vgpr_32, 8, killed [[Reg47]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg346:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg50]]:vgpr_32, 8, killed [[Reg49]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg347:%[0-9]+]]:sreg_32 = SI_ELSE killed [[Reg338]]:sreg_32, %bb.6, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.4 +; EMPTY: +; CHECK: bb.4.bb7: +; CHECK-NEXT: ; predecessors: %bb.3 +; CHECK-NEXT: successors: %bb.6(0x80000000); %bb.6(100.00%) +; EMPTY: +; CHECK: [[Reg348:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg341]]:vgpr_32, [[Reg44]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg343]]:vreg_64, killed [[Reg348]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p6, addrspace 1) +; CHECK-NEXT: [[Reg349:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[Reg339]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: S_BRANCH %bb.6 +; EMPTY: +; CHECK: bb.5.bb8: +; CHECK-NEXT: ; predecessors: %bb.2 +; CHECK-NEXT: successors: %bb.3(0x80000000); %bb.3(100.00%) +; EMPTY: +; CHECK: [[Reg350:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg335]]:vgpr_32, killed [[Reg35]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg24]]:vreg_64, killed [[Reg350]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p7, addrspace 1) +; CHECK-NEXT: [[Reg351:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[Reg84]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: [[Reg340]]:sreg_32 = COPY killed [[Reg351]]:sreg_32 +; CHECK-NEXT: S_BRANCH %bb.3 +; EMPTY: +; CHECK: bb.6.Flow1: +; CHECK-NEXT: ; predecessors: %bb.3, %bb.4 +; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000); %bb.7(50.00%), %bb.8(50.00%) +; EMPTY: +; CHECK: [[Reg352:%[0-9]+]]:sreg_32 = PHI [[Reg339]]:sreg_32, %bb.3, [[Reg349]]:sreg_32, %bb.4 +; CHECK-NEXT: SI_END_CF killed [[Reg347]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg353:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg346]]:vgpr_32, 16, killed [[Reg345]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg354:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg352]]:sreg_32, %bb.8, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.7 +; EMPTY: +; CHECK: bb.7.bb9: +; CHECK-NEXT: ; predecessors: %bb.6 +; CHECK-NEXT: successors: %bb.8(0x80000000); %bb.8(100.00%) +; EMPTY: +; CHECK: [[Reg355:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg37]]:vgpr_32, killed [[Reg44]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg30]]:vreg_64, killed [[Reg355]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p1, addrspace 1) +; EMPTY: +; CHECK: bb.8.bb10: +; CHECK-NEXT: ; predecessors: %bb.6, %bb.7 +; EMPTY: +; CHECK: SI_END_CF killed [[Reg354]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg356:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg353]]:vgpr_32, killed [[Reg83]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg29]]:vreg_64, killed [[Reg356]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p9, addrspace 1) +; CHECK-NEXT: S_ENDPGM 0 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg22]] = 88.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 24.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 23.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 23.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 22.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 23.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 22.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 20.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg13]] = 14.0 +; CHECK-NEXT: Next-use distance of Register [[Reg12]] = 13.0 +; CHECK-NEXT: Next-use distance of Register [[Reg11]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg10]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 17.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 15.0 +; CHECK-NEXT: Next-use distance of Register [[Reg5]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 35.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 79.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 63.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 37.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 26.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 20.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 17.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 15.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 75.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg39]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 63.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 47.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 46.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 56.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 55.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 55.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 54.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg52]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg53]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg54]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg56]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg57]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg58]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg59]] = 22.0 +; CHECK-NEXT: Next-use distance of Register [[Reg60]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg61]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg62]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg63]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg64]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg65]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg66]] = 25.0 +; CHECK-NEXT: Next-use distance of Register [[Reg67]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg68]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg69]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg70]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg71]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg72]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg73]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg74]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg75]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg76]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg77]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg78]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg79]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg80]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg81]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg82]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg83]] = 27.0 +; CHECK-NEXT: Next-use distance of Register [[Reg84]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg85]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg86]] = 252.0 +; CHECK-NEXT: Next-use distance of Register [[Reg87]] = 15.0 +; CHECK-NEXT: Next-use distance of Register [[Reg88]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg89]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg90]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg91]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg92]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg93]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg94]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg95]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg96]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg97]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg98]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg99]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg100]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg101]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg102]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg103]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg104]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg105]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg106]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg107]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg108]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg110]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg111]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg113]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg114]] = 126.0 +; CHECK-NEXT: Next-use distance of Register [[Reg115]] = 126.0 +; CHECK-NEXT: Next-use distance of Register [[Reg116]] = 32.0 +; CHECK-NEXT: Next-use distance of Register [[Reg117]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg118]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg119]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg120]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg121]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg122]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg123]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg124]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg125]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg126]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg127]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg128]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg129]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg130]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg131]] = 203.0 +; CHECK-NEXT: Next-use distance of Register [[Reg132]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg133]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg134]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg135]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg136]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg137]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg138]] = 23.0 +; CHECK-NEXT: Next-use distance of Register [[Reg139]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg140]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg141]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg142]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg143]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg144]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg145]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg146]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg147]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg148]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg149]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg150]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg151]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg152]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg153]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg154]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg155]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg156]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg157]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg158]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg159]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg160]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg161]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg162]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg163]] = 162.0 +; CHECK-NEXT: Next-use distance of Register [[Reg164]] = 160.0 +; CHECK-NEXT: Next-use distance of Register [[Reg165]] = 159.0 +; CHECK-NEXT: Next-use distance of Register [[Reg166]] = 157.0 +; CHECK-NEXT: Next-use distance of Register [[Reg167]] = 137.0 +; CHECK-NEXT: Next-use distance of Register [[Reg168]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg169]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg170]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg171]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg172]] = 128.0 +; CHECK-NEXT: Next-use distance of Register [[Reg173]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg174]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg175]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg176]] = 123.0 +; CHECK-NEXT: Next-use distance of Register [[Reg177]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg178]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg179]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg180]] = 87.0 +; CHECK-NEXT: Next-use distance of Register [[Reg181]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg182]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg183]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg184]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg185]] = 112.0 +; CHECK-NEXT: Next-use distance of Register [[Reg186]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg187]] = 109.0 +; CHECK-NEXT: Next-use distance of Register [[Reg188]] = 107.0 +; CHECK-NEXT: Next-use distance of Register [[Reg189]] = 105.0 +; CHECK-NEXT: Next-use distance of Register [[Reg190]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg191]] = 84.0 +; CHECK-NEXT: Next-use distance of Register [[Reg192]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg193]] = 81.0 +; CHECK-NEXT: Next-use distance of Register [[Reg194]] = 79.0 +; CHECK-NEXT: Next-use distance of Register [[Reg195]] = 42.0 +; CHECK-NEXT: Next-use distance of Register [[Reg196]] = 38.0 +; CHECK-NEXT: Next-use distance of Register [[Reg197]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg198]] = 17.0 +; CHECK-NEXT: Next-use distance of Register [[Reg199]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg200]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg201]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg202]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg203]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg204]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg205]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg206]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg207]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg208]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg209]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg210]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg211]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg212]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg213]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg214]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg215]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg216]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg217]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg218]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg219]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg220]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg221]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg222]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg223]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg224]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg225]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg226]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg227]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg228]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg229]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg230]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg231]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg232]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg233]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg234]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg235]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg236]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg237]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg238]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg240]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg241]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg242]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg243]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg244]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg245]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg246]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg247]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg248]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg249]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg250]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg251]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg252]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg253]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg254]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg255]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg256]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg257]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg258]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg259]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg260]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg261]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg262]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg263]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg264]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg266]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg267]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg269]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg270]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg271]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg273]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg274]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg275]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg276]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg277]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg278]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg279]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg280]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg281]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg282]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg283]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg284]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg285]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg286]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg287]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg288]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg289]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg290]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg291]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg292]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg293]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg294]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg295]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg296]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg297]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg298]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg299]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg300]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg301]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg302]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg303]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg304]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg305]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg306]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg307]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg308]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg309]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg310]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg311]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg312]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg313]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg314]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg315]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg316]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg317]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg318]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg319]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg320]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg321]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg322]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg323]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg324]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg325]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg326]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg327]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg328]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg329]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg330]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg331]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg332]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg333]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg334]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg335]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg336]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg337]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg338]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg339]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg341]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg343]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg345]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg346]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg347]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg348]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg349]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg350]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg351]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg340]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg352]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg353]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg354]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg355]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg356]] = 1.0 +entry: + %ld = load i32, ptr addrspace(1) %p4, align 2 + %ld0 = load i32, ptr addrspace(1) %p5, align 8 + %ld1 = load i32, ptr addrspace(1) %p1, align 4 + %ld2 = load i32, ptr addrspace(1) %p9, align 1 + %ld8 = load i32, ptr addrspace(1) %p8, align 2 + %ld6 = load i32, ptr addrspace(1) %p6, align 4 + %ld10 = load i32, ptr addrspace(1) %p10, align 1 + %ld11 = load i32, ptr addrspace(1) %p11, align 1 + %tmp0 = sub i32 %ld, %ld0 + store i32 %tmp0, ptr addrspace(1) %p11 + %tmp1 = add i32 %ld, %ld0 + store i32 %tmp1, ptr addrspace(1) %p1 + %ld3 = load i32, ptr addrspace(3) %p2, align 1 + %load4 = load <8 x i32>, ptr addrspace(1) %p3, align 1 + %extract11 = extractelement < 8 x i32> %load4, i32 0 + %extract12 = extractelement < 8 x i32> %load4, i32 1 + %extract13 = extractelement < 8 x i32> %load4, i32 2 + %extract14 = extractelement < 8 x i32> %load4, i32 3 + %extract15 = extractelement < 8 x i32> %load4, i32 4 + %extract16 = extractelement < 8 x i32> %load4, i32 5 + %extract17 = extractelement < 8 x i32> %load4, i32 6 + %extract18 = extractelement < 8 x i32> %load4, i32 7 + %tmp70 = mul i32 %ld3, %extract11 + %tmp71 = add i32 %tmp70, %extract12 + %tmp72 = sub i32 %tmp71, %ld0 + store i32 %tmp71, ptr addrspace(1) %p3 + %ld9 = load i32, ptr addrspace(1) %p9 + store i32 %ld3, ptr addrspace(1) %p7 + store i32 %tmp70, ptr addrspace(1) %p9 + %cond1 = icmp uge i32 %ld0, %arg1 + br i1 %cond1, label %bb1, label %bb2 + +bb1: + %load1 = load i32, ptr addrspace(1) %p3, align 4 + %load2 = load <8 x i32>, ptr addrspace(1) %p10, align 1 + %extract1 = extractelement < 8 x i32> %load2, i32 1 + %extract2 = extractelement < 8 x i32> %load2, i32 2 + %tmp84 = add i32 %load1, %extract1 + %tmp85 = mul i32 %tmp84, %extract2 + store i32 %tmp85, ptr addrspace(1) %p6 + store i32 %tmp84, ptr addrspace(1) %p8 + store i32 %extract1, ptr addrspace(1) %p9 + store i32 %load1, ptr addrspace(1) %p10 + %tmp101 = mul i32 %load1, %load1 + %tmp102 = sub i32 %load1, %extract1 + %tmp103 = mul i32 %extract1, %extract2 + %tmp104 = sub i32 %load1, %extract2 + %tmp73 = mul i32 %tmp101, %ld0 + %tmp74 = add i32 %tmp73, %ld + %tmp75 = mul i32 %tmp74, %tmp1 + %tmp76 = add i32 %tmp75, %tmp102 + %tmp77 = sub i32 %tmp76, %tmp103 + %tmp78 = mul i32 %tmp76, %tmp104 + %tmp2 = mul i32 %tmp101, %tmp103 + %idx10 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 1, i64 0 + %val0 = load i32, i32* %idx10, align 4 + %tmp3 = add i32 %val0, %tmp104 + %idx20 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 0, i64 1 + store i32 %tmp3, i32 *%idx20 + %load22 = load <8 x i32>, ptr addrspace(1) %p7, align 1 + %extract3 = extractelement < 8 x i32> %load22, i32 6 + %idx12 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 1, i64 2 + %val2 = load i32, i32* %idx12, align 4 + %tmp4 = mul i32 %val2, %tmp2 + %tmp5= add i32 %tmp3, %tmp103 + %tmp6 = mul i32 %tmp104, %val0 + %tmp7 = sub i32 %tmp6, %tmp4 + %tmp8 = mul i32 %tmp5, %tmp7 + %tmp9 = add i32 %tmp8, %extract3 + %idx22 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 3, i64 2 + store i32 %tmp9, i32 *%idx22 + %extract4 = extractelement < 8 x i32> %load22, i32 7 + %idx13 = getelementptr inbounds [5 x i32], [5 x i32]* @array5, i64 1, i64 0 + %val3 = load i32, i32* %idx13, align 4 + %tmp10 = mul i32 %val3, %extract4 + %tmp11 = add i32 %tmp10, %tmp102 + %tmp12 = sub i32 %val0, %extract4 + %tmp13 = mul i32 %tmp104, %tmp103 + %tmp14 = add i32 %tmp11, %tmp5 + %tmp15 = add i32 %tmp10, %tmp8 + %tmp16 = sub i32 %extract4, %tmp104 + %tmp17 = add i32 %tmp12, %val2 + %tmp18 = add i32 %val0, %tmp9 + %idx601 = getelementptr inbounds [5 x i32], [5 x i32]* @array6, i64 2, i64 1 + %val601 = load i32, i32* %idx601, align 1 + %tmp19 = mul i32 %val601, %tmp12 + %idx701 = getelementptr inbounds [5 x i32], [5 x i32]* @array7, i64 1, i64 0 + %val701 = load i32, i32* %idx701, align 2 + %tmp20 = sub i32 %val701, %tmp11 + %idx801 = getelementptr inbounds [5 x i32], [5 x i32]* @array8, i64 2, i64 1 + %val801 = load i32, i32* %idx801, align 8 + %tmp21 = add i32 %val801, %tmp10 + %idx901 = getelementptr inbounds [5 x i32], [5 x i32]* @array9, i64 1, i64 1 + %val901 = load i32, i32* %idx901, align 1 + %tmp22 = mul i32 %val901, %tmp9 + %idx602 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 4, i64 1 + %val602 = load i32, i32* %idx602, align 1 + %tmp23 = add i32 %val602, %tmp8 + %idx702 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 4, i64 0 + %val702 = load i32, i32* %idx702, align 2 + %tmp24 = sub i32 %val702, %tmp7 + %idx802 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 4, i64 0 + %val802 = load i32, i32* %idx802, align 8 + %tmp25 = add i32 %val802, %tmp6 + %idx902 = getelementptr inbounds [5 x i32], [5 x i32]* @array5, i64 4, i64 2 + %val902 = load i32, i32* %idx902, align 1 + %tmp26 = mul i32 %val902, %tmp5 + %idx800 = getelementptr inbounds [5 x i32], [5 x i32]* @array8, i64 1, i64 0 + %val800 = load i32, i32* %idx800, align 4 + %tmp27 = add i32 %val800, %tmp4 + %idx15 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 0, i64 2 + %val5 = load i32, i32* %idx15, align 4 + %tmp28 = mul i32 %val5, %tmp3 + %idx16 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 0, i64 3 + %val6 = load i32, i32* %idx16, align 4 + %tmp206 = add i32 %val6, %tmp9 + %idx17 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 0, i64 1 + %val7 = load i32, i32* %idx17, align 4 + %tmp207 = add i32 %val7, %tmp3 + %idx18 = getelementptr inbounds [5 x i32], [5 x i32]* @array5, i64 0, i64 1 + %val8 = load i32, i32* %idx18, align 4 + %tmp208 = mul i32 %val8, %tmp10 + %load3 = load <8 x i32>, ptr addrspace(1) %p4, align 1 + %extract7 = extractelement < 8 x i32> %load3, i32 4 + %tmp209 = add i32 %extract7, %tmp11 + %extract8 = extractelement < 8 x i32> %load3, i32 3 + %tmp30 = mul i32 %extract8, %tmp12 + %tmp31 = add i32 %tmp30, %tmp209 + %tmp32 = udiv i32 %tmp31, %tmp208 + %tmp33 = add i32 %tmp32, %tmp207 + %tmp34 = mul i32 %tmp33, %val902 + %tmp35 = sub i32 %tmp34, %val901 + %tmp36 = add i32 %tmp35, %tmp206 + %tmp37 = mul i32 %tmp36, %tmp78 + %tmp38 = add i32 %tmp37, %tmp77 + %tmp39 = sub i32 %tmp38, %ld9 + %tmp40 = udiv i32 %tmp39, %extract4 + store i32 %tmp40, ptr addrspace(3) %p2, align 1 + %tmp41 = sub i32 %tmp40, %val800 + %tmp42 = mul i32 %tmp41, %extract7 + %tmp43 = add i32 %tmp42, %val801 + %tmp44 = mul i32 %tmp43, %val2 + %tmp45 = add i32 %tmp44, %val0 + %tmp46 = sub i32 %tmp45, %tmp2 + %tmp47 = add i32 %tmp46, %tmp28 + %tmp48 = mul i32 %tmp47, %tmp27 + %tmp49 = udiv i32 %tmp48, %tmp26 + %tmp50 = add i32 %tmp49, %tmp25 + %tmp51 = sub i32 %tmp50, %tmp24 + %tmp52 = add i32 %tmp51, %tmp23 + %tmp53 = mul i32 %tmp52, %tmp22 + %tmp54 = add i32 %tmp53, %tmp21 + %tmp55 = sub i32 %tmp54, %tmp20 + %tmp56 = add i32 %tmp55, %tmp19 + %tmp57 = mul i32 %tmp56, %tmp3 + %idx700 = getelementptr inbounds [5 x i32], [5 x i32]* @array7, i64 3, i64 2 + store i32 %tmp57, i32 *%idx700 + %tmp58 = add i32 %tmp57, %tmp18 + %tmp59 = udiv i32 %tmp58, %tmp17 + %tmp60 = mul i32 %tmp59, %tmp16 + %tmp61 = add i32 %tmp60, %tmp15 + %tmp62 = add i32 %tmp61, %tmp14 + %tmp63 = mul i32 %tmp62, %tmp13 + %tmp64 = mul i32 %tmp63, %ld2 + %idx23 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 3, i64 4 + store i32 %tmp64, i32 *%idx23 + %extract27 = extractelement < 8 x i32> %load3, i32 4 + %idx14 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 4, i64 1 + %val4 = load i32, i32* %idx14, align 4 + %tmp65 = add i32 %val4, %extract27 + %tmp66 = sub i32 %tmp65, %tmp2 + %idx24 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 3, i64 5 + store i32 %tmp66, i32 *%idx24 + %extract9 = extractelement < 8 x i32> %load22, i32 0 + %idx600 = getelementptr inbounds [5 x i32], [5 x i32]* @array6, i64 1, i64 2 + %val600 = load i32, i32* %idx600, align 4 + %tmp67 = add i32 %val600, %extract9 + %extract10 = extractelement < 8 x i32> %load22, i32 5 + %tmp68 = sub i32 %extract10, %tmp3 + %tmp69 = add i32 %ld10, %ld6 + %tmp79 = mul i32 %tmp67, %ld8 + store i32 %tmp79, ptr addrspace(1) %p8, align 2 + br label %bb2 + +bb2: + %phi1 = phi i32 [ %load1, %bb1 ], [ %tmp72, %entry ] + %phi2 = phi i32 [ %tmp102, %bb1], [ %ld3, %entry ] + %phi3 = phi i32 [ %val4, %bb1 ], [ %extract13, %entry ] + %phi4 = phi i32 [ %tmp104, %bb1 ], [ %tmp71, %entry ] + %tmp105 = add i32 %phi1, %phi2 + %tmp106 = add i32 %ld8, %phi4 + %tmp107 = mul i32 %tmp105, %tmp106 + %tmp108 = sub i32 %tmp107, %ld6 + %tmp80 = mul i32 %tmp108, %ld2 + %cond3 = icmp ule i32 %ld, %phi3 + br i1 %cond3, label %bb7, label %bb8 + +bb7: + %tmp81 = add i32 %phi2, %ld2 + store i32 %tmp81, ptr addrspace(1) %p6 + br label %bb9 + +bb8: + %tmp82 = add i32 %phi4, %ld + store i32 %tmp82, ptr addrspace(1) %p7 + %xor = xor i1 %cond1, %cond3 + br i1 %xor, label %bb9, label %bb10 + +bb9: + %phi5 = phi i32 [ %tmp81, %bb7], [%tmp82, %bb8] + %tmp83 = add i32 %ld1, %ld2 + store i32 %tmp83, ptr addrspace(1) %p1 + br label %bb10 + +bb10: + %tmp90 = add i32 %ld10, %ld9 + store i32 %tmp90, ptr addrspace(1) %p9, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_multiple_spills3.ll b/llvm/test/CodeGen/AMDGPU/test_ers_multiple_spills3.ll new file mode 100644 index 0000000000000..62d3305a5010c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_multiple_spills3.ll @@ -0,0 +1,1189 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +@array2 = global [5 x i32] zeroinitializer, align 4 +@array3 = global [5 x i32] zeroinitializer, align 4 +@array4 = global [5 x i32] zeroinitializer, align 4 +@array5 = global [5 x i32] zeroinitializer, align 4 + +@array6 = global [5 x i32] zeroinitializer, align 4 +@array7 = global [5 x i32] zeroinitializer, align 4 +@array8 = global [5 x i32] zeroinitializer, align 4 +@array9 = global [5 x i32] zeroinitializer, align 4 + +; bb.0.entry +; / | +; bb.1.bb1 | +; \ | +; bb.2.bb2 +; | \ +; | bb.3.bb4.preheader +; | | +; | bb.19<-+ +; | +----+ +; | | +; | bb.20.bb14.loopexit +; | / +; bb.18.Flow17 +; / | +; bb.4.bb3 | +; / | | +; bb.10.bb7 | | +; \ | | +; bb.5.Flow16 | +; / | | +; bb.6.bb6 | | +; / | | | +; bb.9.bb9 | | | +; \ | | | +; bb.7.Flow14 | | +; / | | | +; bb.8.bb8 | | | +; \ | | | +; bb.11.Flow15 | | +; \ | | +; bb.13.bb10 | +; / | | +; bb.16.bb12 | | +; \ | | +; bb.14.Flow | +; / | | +; bb.15.bb11 | | +; \ | | +; bb.17.bb13 | +; \ | +; bb.12.Flow18 +; | +; bb.21.bb14 +; +define amdgpu_ps void @test(ptr addrspace(1) %p1, ptr addrspace(3) %p2, i1 %cond1, i1 %cond2, ptr addrspace(1) %p3, ptr addrspace(1) %p4, ptr addrspace(1) %p5, ptr addrspace(1) %p6, ptr addrspace(1) %p7, ptr addrspace(1) %p8, ptr addrspace(1) %p9, i32 %TC1) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Frame Objects: +; CHECK-NEXT: fi#0: variable sized, align=1, at location [SP] +; CHECK-NEXT: fi#1: variable sized, align=1, at location [SP] +; CHECK-NEXT: save/restore points: +; CHECK-NEXT: save points are empty +; CHECK-NEXT: restore points are empty +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]], $vgpr5 in [[Reg6:%[0-9]+]], $vgpr6 in [[Reg7:%[0-9]+]], $vgpr7 in [[Reg8:%[0-9]+]], $vgpr8 in [[Reg9:%[0-9]+]], $vgpr9 in [[Reg10:%[0-9]+]], $vgpr10 in [[Reg11:%[0-9]+]], $vgpr11 in [[Reg12:%[0-9]+]], $vgpr12 in [[Reg13:%[0-9]+]], $vgpr13 in [[Reg14:%[0-9]+]], $vgpr14 in [[Reg15:%[0-9]+]], $vgpr15 in [[Reg16:%[0-9]+]], $vgpr16 in [[Reg17:%[0-9]+]], $vgpr17 in [[Reg18:%[0-9]+]], $vgpr18 in [[Reg19:%[0-9]+]], $vgpr19 in [[Reg20:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000); %bb.1(50.00%), %bb.2(50.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19 +; CHECK-NEXT: [[Reg20]]:vgpr_32 = COPY killed $vgpr19 +; CHECK-NEXT: [[Reg19]]:vgpr_32 = COPY killed $vgpr18 +; CHECK-NEXT: [[Reg18]]:vgpr_32 = COPY killed $vgpr17 +; CHECK-NEXT: [[Reg17]]:vgpr_32 = COPY killed $vgpr16 +; CHECK-NEXT: [[Reg16]]:vgpr_32 = COPY killed $vgpr15 +; CHECK-NEXT: [[Reg15]]:vgpr_32 = COPY killed $vgpr14 +; CHECK-NEXT: [[Reg14]]:vgpr_32 = COPY killed $vgpr13 +; CHECK-NEXT: [[Reg13]]:vgpr_32 = COPY killed $vgpr12 +; CHECK-NEXT: [[Reg12]]:vgpr_32 = COPY killed $vgpr11 +; CHECK-NEXT: [[Reg11]]:vgpr_32 = COPY killed $vgpr10 +; CHECK-NEXT: [[Reg10]]:vgpr_32 = COPY killed $vgpr9 +; CHECK-NEXT: [[Reg9]]:vgpr_32 = COPY killed $vgpr8 +; CHECK-NEXT: [[Reg8]]:vgpr_32 = COPY killed $vgpr7 +; CHECK-NEXT: [[Reg7]]:vgpr_32 = COPY killed $vgpr6 +; CHECK-NEXT: [[Reg6]]:vgpr_32 = COPY killed $vgpr5 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg21:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg6]]:vgpr_32, %subreg.sub0, killed [[Reg7]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg22:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg1]]:vgpr_32, %subreg.sub0, killed [[Reg2]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg23:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, killed [[Reg4]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg24:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 1, killed [[Reg23]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg25:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, killed [[Reg5]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg26:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 1, killed [[Reg25]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg27:%[0-9]+]]:sreg_32 = S_XOR_B32 [[Reg26]]:sreg_32, -1, implicit-def dead $scc +; CHECK-NEXT: [[Reg28:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg22]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg29:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg22]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg30:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg29]]:vgpr_32, 8, killed [[Reg28]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg31:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg22]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg32:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg22]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg33:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg32]]:vgpr_32, 8, killed [[Reg31]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg34:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg33]]:vgpr_32, 16, killed [[Reg30]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg35:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg22]]:vreg_64, 12, 0, implicit $exec :: (load (s8) from %ir.gep1, addrspace 1) +; CHECK-NEXT: [[Reg36:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg22]]:vreg_64, 13, 0, implicit $exec :: (load (s8) from %ir.gep1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg37:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg22]]:vreg_64, 14, 0, implicit $exec :: (load (s8) from %ir.gep1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg38:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg22]]:vreg_64, 15, 0, implicit $exec :: (load (s8) from %ir.gep1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg39:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[Reg21]]:vreg_64, 16, 0, implicit $exec :: (load (s128) from %ir.p3 + 16, align 4, addrspace 1) +; CHECK-NEXT: [[Reg40:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[Reg21]]:vreg_64, 0, 0, implicit $exec :: (load (s128) from %ir.p3, align 4, addrspace 1) +; CHECK-NEXT: [[Reg41:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg40]].sub0:vreg_128, [[Reg34]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg21]]:vreg_64, [[Reg41]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg42:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg41]]:vgpr_32, [[Reg34]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg43:%[0-9]+]]:sreg_32 = SI_IF [[Reg24]]:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.1 +; EMPTY: +; CHECK: bb.1.bb1: +; CHECK-NEXT: ; predecessors: %bb.0 +; CHECK-NEXT: successors: %bb.2(0x80000000); %bb.2(100.00%) +; EMPTY: +; CHECK: [[Reg44:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 100, implicit $exec +; EMPTY: +; CHECK: bb.2.bb2: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.1 +; CHECK-NEXT: successors: %bb.3(0x40000000), %bb.18(0x40000000); %bb.3(50.00%), %bb.18(50.00%) +; EMPTY: +; CHECK: [[Reg45:%[0-9]+]]:vgpr_32 = PHI [[Reg42]]:vgpr_32, %bb.0, [[Reg44]]:vgpr_32, %bb.1 +; CHECK-NEXT: SI_END_CF killed [[Reg43]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg46:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg27]]:sreg_32, %bb.18, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.3 +; EMPTY: +; CHECK: bb.3.bb4.preheader: +; CHECK-NEXT: ; predecessors: %bb.2 +; CHECK-NEXT: successors: %bb.19(0x80000000); %bb.19(100.00%) +; EMPTY: +; CHECK: [[Reg47:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array6, target-flags(amdgpu-gotprel32-hi) @array6, implicit-def dead $scc +; CHECK-NEXT: [[Reg48:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg47]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg49:%[0-9]+]]:vreg_64 = COPY killed [[Reg48]]:sreg_64_xexec +; CHECK-NEXT: [[Reg50:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg49]]:vreg_64, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array6, i64 28)`) +; CHECK-NEXT: [[Reg51:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array8, target-flags(amdgpu-gotprel32-hi) @array8, implicit-def dead $scc +; CHECK-NEXT: [[Reg52:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg51]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg53:%[0-9]+]]:vreg_64 = COPY killed [[Reg52]]:sreg_64_xexec +; CHECK-NEXT: [[Reg54:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg53]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array8, i64 20)`) +; CHECK-NEXT: [[Reg55:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 +; CHECK-NEXT: [[Reg56:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; CHECK-NEXT: S_BRANCH %bb.19 +; EMPTY: +; CHECK: bb.4.bb3: +; CHECK-NEXT: ; predecessors: %bb.18 +; CHECK-NEXT: successors: %bb.10(0x40000000), %bb.5(0x40000000); %bb.10(50.00%), %bb.5(50.00%) +; EMPTY: +; CHECK: [[Reg57:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg58:%[0-9]+]]:vgpr_32, 8, killed [[Reg59:%[0-9]+]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg60:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg61:%[0-9]+]]:vgpr_32, 8, killed [[Reg62:%[0-9]+]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg63:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[Reg64:%[0-9]+]]:vgpr_32, %subreg.sub0, [[Reg65:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg66:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg67:%[0-9]+]]:vgpr_32, %subreg.sub0, killed [[Reg68:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg69:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg60]]:vgpr_32, 16, killed [[Reg57]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg70:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg71:%[0-9]+]]:vgpr_32, 0, 0, implicit $exec :: (load (s8) from %ir.p2, addrspace 3) +; CHECK-NEXT: [[Reg72:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg71]]:vgpr_32, 1, 0, implicit $exec :: (load (s8) from %ir.p2 + 1, addrspace 3) +; CHECK-NEXT: [[Reg73:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg71]]:vgpr_32, 2, 0, implicit $exec :: (load (s8) from %ir.p2 + 2, addrspace 3) +; CHECK-NEXT: [[Reg74:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg71]]:vgpr_32, 3, 0, implicit $exec :: (load (s8) from %ir.p2 + 3, addrspace 3) +; CHECK-NEXT: [[Reg75:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 killed [[Reg71]]:vgpr_32, 12, 0, implicit $exec :: (load (s32) from %ir.gep2, align 8, addrspace 3) +; CHECK-NEXT: [[Reg76:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg72]]:vgpr_32, 8, killed [[Reg70]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg77:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg74]]:vgpr_32, 8, killed [[Reg73]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg78:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 [[Reg77]]:vgpr_32, 16, [[Reg76]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg79:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 [[Reg80:%[0-9]+]]:vgpr_32, [[Reg45]]:vgpr_32, 1900, 0, implicit $exec +; CHECK-NEXT: [[Reg81:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg75]]:vgpr_32, [[Reg79]].sub0:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg82:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 20, 0, implicit $exec :: (load (s8) from %ir.p4 + 20, addrspace 1) +; CHECK-NEXT: [[Reg83:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 21, 0, implicit $exec :: (load (s8) from %ir.p4 + 21, addrspace 1) +; CHECK-NEXT: [[Reg84:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg83]]:vgpr_32, 8, killed [[Reg82]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg85:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 22, 0, implicit $exec :: (load (s8) from %ir.p4 + 22, addrspace 1) +; CHECK-NEXT: [[Reg86:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 23, 0, implicit $exec :: (load (s8) from %ir.p4 + 23, addrspace 1) +; CHECK-NEXT: [[Reg87:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg86]]:vgpr_32, 8, killed [[Reg85]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg88:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 12, 0, implicit $exec :: (load (s8) from %ir.p4 + 12, addrspace 1) +; CHECK-NEXT: [[Reg89:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 13, 0, implicit $exec :: (load (s8) from %ir.p4 + 13, addrspace 1) +; CHECK-NEXT: [[Reg90:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg89]]:vgpr_32, 8, killed [[Reg88]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg91:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 14, 0, implicit $exec :: (load (s8) from %ir.p4 + 14, addrspace 1) +; CHECK-NEXT: [[Reg92:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 15, 0, implicit $exec :: (load (s8) from %ir.p4 + 15, addrspace 1) +; CHECK-NEXT: [[Reg93:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg92]]:vgpr_32, 8, killed [[Reg91]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg94:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 8, 0, implicit $exec :: (load (s8) from %ir.p4 + 8, addrspace 1) +; CHECK-NEXT: [[Reg95:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 9, 0, implicit $exec :: (load (s8) from %ir.p4 + 9, addrspace 1) +; CHECK-NEXT: [[Reg96:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg95]]:vgpr_32, 8, killed [[Reg94]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg97:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 10, 0, implicit $exec :: (load (s8) from %ir.p4 + 10, addrspace 1) +; CHECK-NEXT: [[Reg98:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 11, 0, implicit $exec :: (load (s8) from %ir.p4 + 11, addrspace 1) +; CHECK-NEXT: [[Reg99:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg98]]:vgpr_32, 8, killed [[Reg97]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg100:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 4, 0, implicit $exec :: (load (s8) from %ir.p4 + 4, addrspace 1) +; CHECK-NEXT: [[Reg101:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 5, 0, implicit $exec :: (load (s8) from %ir.p4 + 5, addrspace 1) +; CHECK-NEXT: [[Reg102:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg101]]:vgpr_32, 8, killed [[Reg100]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg103:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 6, 0, implicit $exec :: (load (s8) from %ir.p4 + 6, addrspace 1) +; CHECK-NEXT: [[Reg104:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg66]]:vreg_64, 7, 0, implicit $exec :: (load (s8) from %ir.p4 + 7, addrspace 1) +; CHECK-NEXT: [[Reg105:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg104]]:vgpr_32, 8, killed [[Reg103]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg106:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg66]]:vreg_64, 0, 0, implicit $exec :: (load (s16) from %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg107:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg66]]:vreg_64, 2, 0, implicit $exec :: (load (s16) from %ir.p4 + 2, addrspace 1) +; CHECK-NEXT: [[Reg108:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg107]]:vgpr_32, 16, killed [[Reg106]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg109:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg108]]:vgpr_32, killed [[Reg110:%[0-9]+]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD [[Reg66]]:vreg_64, killed [[Reg109]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p4, addrspace 1) +; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 +; CHECK-NEXT: [[Reg111:%[0-9]+]]:sreg_32_xexec_hi = COPY $sgpr32 +; CHECK-NEXT: [[Reg112:%[0-9]+]]:sreg_32 = S_ADD_I32 [[Reg111]]:sreg_32_xexec_hi, 1024, implicit-def dead $scc +; CHECK-NEXT: $sgpr32 = COPY killed [[Reg112]]:sreg_32 +; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 +; CHECK-NEXT: [[Reg113:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[Reg63]]:vreg_64, 0, 0, implicit $exec :: (load (s64) from %ir.p6, align 4, addrspace 1) +; CHECK-NEXT: [[Reg114:%[0-9]+]]:vgpr_32 = COPY [[Reg113]].sub0:vreg_64 +; CHECK-NEXT: [[Reg115:%[0-9]+]]:vgpr_32 = COPY [[Reg113]].sub1:vreg_64 +; CHECK-NEXT: [[Reg116:%[0-9]+]]:vgpr_32 = nsw V_LSHLREV_B32_e64 2, [[Reg113]].sub0:vreg_64, implicit $exec +; CHECK-NEXT: SCRATCH_STORE_DWORD_SVS [[Reg45]]:vgpr_32, killed [[Reg116]]:vgpr_32, [[Reg111]]:sreg_32_xexec_hi, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.arrayidx11, addrspace 5) +; CHECK-NEXT: [[Reg117:%[0-9]+]]:vgpr_32 = nsw V_LSHLREV_B32_e64 2, killed [[Reg113]].sub1:vreg_64, implicit $exec +; CHECK-NEXT: SCRATCH_STORE_SHORT_SVS killed [[Reg77]]:vgpr_32, [[Reg117]]:vgpr_32, [[Reg111]]:sreg_32_xexec_hi, 2, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %ir.arrayidx33 + 2, addrspace 5) +; CHECK-NEXT: SCRATCH_STORE_SHORT_SVS killed [[Reg76]]:vgpr_32, killed [[Reg117]]:vgpr_32, killed [[Reg111]]:sreg_32_xexec_hi, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %ir.arrayidx33, addrspace 5) +; CHECK-NEXT: [[Reg118:%[0-9]+]]:sreg_32 = S_XOR_B32 [[Reg24]]:sreg_32, [[Reg26]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: [[Reg119:%[0-9]+]]:sreg_32 = S_XOR_B32 killed [[Reg118]]:sreg_32, -1, implicit-def dead $scc +; CHECK-NEXT: [[Reg120:%[0-9]+]]:sreg_32 = SI_IF [[Reg119]]:sreg_32, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.10 +; EMPTY: +; CHECK: bb.5.Flow18: +; CHECK-NEXT: ; predecessors: %bb.4, %bb.10 +; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.13(0x40000000); %bb.6(50.00%), %bb.13(50.00%) +; EMPTY: +; CHECK: [[Reg121:%[0-9]+]]:vgpr_32 = PHI undef [[Reg122:%[0-9]+]]:vgpr_32, %bb.4, [[Reg123:%[0-9]+]]:vgpr_32, %bb.10 +; CHECK-NEXT: [[Reg124:%[0-9]+]]:vgpr_32 = PHI [[Reg69]]:vgpr_32, %bb.4, undef [[Reg125:%[0-9]+]]:vgpr_32, %bb.10 +; CHECK-NEXT: [[Reg126:%[0-9]+]]:vgpr_32 = PHI [[Reg81]]:vgpr_32, %bb.4, undef [[Reg127:%[0-9]+]]:vgpr_32, %bb.10 +; CHECK-NEXT: [[Reg128:%[0-9]+]]:vreg_256 = REG_SEQUENCE undef [[Reg129:%[0-9]+]].sub0:vreg_128, %subreg.sub0, undef [[Reg129]].sub1:vreg_128, %subreg.sub1, undef [[Reg129]].sub2:vreg_128, %subreg.sub2, [[Reg129]].sub3:vreg_128, %subreg.sub3, undef [[Reg130:%[0-9]+]].sub0:vreg_128, %subreg.sub4, [[Reg130]].sub1:vreg_128, %subreg.sub5, undef [[Reg130]].sub2:vreg_128, %subreg.sub6, undef [[Reg130]].sub3:vreg_128, %subreg.sub7 +; CHECK-NEXT: [[Reg131:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg87]]:vgpr_32, 16, killed [[Reg84]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg132:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg93]]:vgpr_32, 16, killed [[Reg90]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg133:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg99]]:vgpr_32, 16, killed [[Reg96]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg134:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg105]]:vgpr_32, 16, killed [[Reg102]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg135:%[0-9]+]]:sreg_32 = SI_ELSE killed [[Reg120]]:sreg_32, %bb.13, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.6 +; EMPTY: +; CHECK: bb.6.bb6: +; CHECK-NEXT: ; predecessors: %bb.5 +; CHECK-NEXT: successors: %bb.9(0x40000000), %bb.7(0x40000000); %bb.9(50.00%), %bb.7(50.00%) +; EMPTY: +; CHECK: [[Reg136:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[Reg24]]:sreg_32, killed [[Reg26]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: [[Reg137:%[0-9]+]]:sreg_32 = S_XOR_B32 killed [[Reg136]]:sreg_32, -1, implicit-def dead $scc +; CHECK-NEXT: [[Reg138:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array2, target-flags(amdgpu-gotprel32-hi) @array2, implicit-def dead $scc +; CHECK-NEXT: [[Reg139:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg138]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg140:%[0-9]+]]:vreg_64 = COPY killed [[Reg139]]:sreg_64_xexec +; CHECK-NEXT: [[Reg141:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg140]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array2, i64 20)`) +; CHECK-NEXT: [[Reg142:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array4, target-flags(amdgpu-gotprel32-hi) @array4, implicit-def dead $scc +; CHECK-NEXT: [[Reg143:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg142]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg144:%[0-9]+]]:vreg_64 = COPY [[Reg143]]:sreg_64_xexec +; CHECK-NEXT: FLAT_STORE_DWORD killed [[Reg144]]:vreg_64, killed [[Reg141]]:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array4, i64 4)`) +; CHECK-NEXT: [[Reg145:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg137]]:sreg_32, %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.9 +; EMPTY: +; CHECK: bb.7.Flow16: +; CHECK-NEXT: ; predecessors: %bb.6, %bb.9 +; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.11(0x40000000); %bb.8(50.00%), %bb.11(50.00%) +; EMPTY: +; CHECK: [[Reg146:%[0-9]+]]:vgpr_32 = PHI undef [[Reg147:%[0-9]+]]:vgpr_32, %bb.6, [[Reg148:%[0-9]+]]:vgpr_32, %bb.9 +; CHECK-NEXT: [[Reg149:%[0-9]+]]:vgpr_32 = PHI [[Reg124]]:vgpr_32, %bb.6, undef [[Reg150:%[0-9]+]]:vgpr_32, %bb.9 +; CHECK-NEXT: [[Reg151:%[0-9]+]]:vgpr_32 = PHI [[Reg126]]:vgpr_32, %bb.6, undef [[Reg152:%[0-9]+]]:vgpr_32, %bb.9 +; CHECK-NEXT: [[Reg153:%[0-9]+]]:sreg_32 = SI_ELSE killed [[Reg145]]:sreg_32, %bb.11, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.8 +; EMPTY: +; CHECK: bb.8.bb8: +; CHECK-NEXT: ; predecessors: %bb.7 +; CHECK-NEXT: successors: %bb.11(0x80000000); %bb.11(100.00%) +; EMPTY: +; CHECK: [[Reg154:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg149]]:vgpr_32, killed [[Reg151]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg155:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array2, target-flags(amdgpu-gotprel32-hi) @array2, implicit-def dead $scc +; CHECK-NEXT: [[Reg156:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg155]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg157:%[0-9]+]]:vreg_64 = COPY killed [[Reg156]]:sreg_64_xexec +; CHECK-NEXT: [[Reg158:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg157]]:vreg_64, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array2, i64 28)`) +; CHECK-NEXT: [[Reg159:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array3, target-flags(amdgpu-gotprel32-hi) @array3, implicit-def dead $scc +; CHECK-NEXT: [[Reg160:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg159]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg161:%[0-9]+]]:vreg_64 = COPY killed [[Reg160]]:sreg_64_xexec +; CHECK-NEXT: FLAT_STORE_DWORD killed [[Reg161]]:vreg_64, killed [[Reg158]]:vgpr_32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 68)`) +; CHECK-NEXT: S_BRANCH %bb.11 +; EMPTY: +; CHECK: bb.9.bb9: +; CHECK-NEXT: ; predecessors: %bb.6 +; CHECK-NEXT: successors: %bb.7(0x80000000); %bb.7(100.00%) +; EMPTY: +; CHECK: [[Reg148]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg124]]:vgpr_32, killed [[Reg126]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg162:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array5, target-flags(amdgpu-gotprel32-hi) @array5, implicit-def dead $scc +; CHECK-NEXT: [[Reg163:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg162]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg164:%[0-9]+]]:vreg_64 = COPY killed [[Reg163]]:sreg_64_xexec +; CHECK-NEXT: [[Reg165:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg164]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array5, i64 20)`) +; CHECK-NEXT: [[Reg166:%[0-9]+]]:vreg_64 = COPY killed [[Reg143]]:sreg_64_xexec +; CHECK-NEXT: FLAT_STORE_DWORD killed [[Reg166]]:vreg_64, killed [[Reg165]]:vgpr_32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array4, i64 60)`) +; CHECK-NEXT: S_BRANCH %bb.7 +; EMPTY: +; CHECK: bb.10.bb7: +; CHECK-NEXT: ; predecessors: %bb.4 +; CHECK-NEXT: successors: %bb.5(0x80000000); %bb.5(100.00%) +; EMPTY: +; CHECK: [[Reg167:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg75]]:vgpr_32, killed [[Reg81]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg168:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg167]]:vgpr_32, [[Reg78]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg169:%[0-9]+]]:vgpr_32 = V_CVT_F32_U32_e64 [[Reg80]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg170:%[0-9]+]]:vgpr_32 = nofpexcept V_RCP_IFLAG_F32_e64 0, killed [[Reg169]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg171:%[0-9]+]]:vgpr_32 = nnan ninf nsz arcp contract afn reassoc nofpexcept V_MUL_F32_e64 0, 1333788670, 0, killed [[Reg170]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg172:%[0-9]+]]:vgpr_32 = nofpexcept V_CVT_U32_F32_e64 0, killed [[Reg171]]:vgpr_32, 0, 0, implicit $mode, implicit $exec +; CHECK-NEXT: [[Reg173:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 0, [[Reg80]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg174:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg173]]:vgpr_32, [[Reg172]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg175:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg172]]:vgpr_32, killed [[Reg174]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg176:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg172]]:vgpr_32, killed [[Reg175]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg177:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32_e64 [[Reg168]]:vgpr_32, killed [[Reg176]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg178:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg177]]:vgpr_32, [[Reg80]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg179:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg168]]:vgpr_32, killed [[Reg178]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg180:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 [[Reg179]]:vgpr_32, [[Reg80]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg181:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg177]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg182:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg177]]:vgpr_32, 0, killed [[Reg181]]:vgpr_32, [[Reg180]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg183:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg179]]:vgpr_32, [[Reg80]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg184:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg179]]:vgpr_32, 0, killed [[Reg183]]:vgpr_32, killed [[Reg180]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg185:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_GE_U32_e64 killed [[Reg184]]:vgpr_32, killed [[Reg80]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg186:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 1, [[Reg182]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg187:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, killed [[Reg182]]:vgpr_32, 0, killed [[Reg186]]:vgpr_32, killed [[Reg185]]:sreg_32_xm0_xexec, implicit $exec +; CHECK-NEXT: [[Reg123]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg187]]:vgpr_32, killed [[Reg69]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg188:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array3, target-flags(amdgpu-gotprel32-hi) @array3, implicit-def dead $scc +; CHECK-NEXT: [[Reg189:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg188]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg190:%[0-9]+]]:vreg_64 = COPY killed [[Reg189]]:sreg_64_xexec +; CHECK-NEXT: [[Reg191:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg190]]:vreg_64, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 84)`) +; CHECK-NEXT: [[Reg192:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array4, target-flags(amdgpu-gotprel32-hi) @array4, implicit-def dead $scc +; CHECK-NEXT: [[Reg193:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg192]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg194:%[0-9]+]]:vreg_64 = COPY killed [[Reg193]]:sreg_64_xexec +; CHECK-NEXT: FLAT_STORE_DWORD killed [[Reg194]]:vreg_64, killed [[Reg191]]:vgpr_32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array4, i64 60)`) +; CHECK-NEXT: S_BRANCH %bb.5 +; EMPTY: +; CHECK: bb.11.Flow17: +; CHECK-NEXT: ; predecessors: %bb.7, %bb.8 +; CHECK-NEXT: successors: %bb.13(0x80000000); %bb.13(100.00%) +; EMPTY: +; CHECK: [[Reg195:%[0-9]+]]:vgpr_32 = PHI [[Reg146]]:vgpr_32, %bb.7, [[Reg154]]:vgpr_32, %bb.8 +; CHECK-NEXT: SI_END_CF killed [[Reg153]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.13 +; EMPTY: +; CHECK: bb.12.Flow20: +; CHECK-NEXT: ; predecessors: %bb.18, %bb.17 +; CHECK-NEXT: successors: %bb.21(0x80000000); %bb.21(100.00%) +; EMPTY: +; CHECK: [[Reg196:%[0-9]+]]:vgpr_32 = PHI [[Reg197:%[0-9]+]]:vgpr_32, %bb.18, [[Reg198:%[0-9]+]]:vgpr_32, %bb.17 +; CHECK-NEXT: [[Reg199:%[0-9]+]]:vgpr_32 = PHI [[Reg200:%[0-9]+]]:vgpr_32, %bb.18, [[Reg198]]:vgpr_32, %bb.17 +; CHECK-NEXT: [[Reg201:%[0-9]+]]:vgpr_32 = PHI [[Reg202:%[0-9]+]]:vgpr_32, %bb.18, [[Reg203:%[0-9]+]]:vgpr_32, %bb.17 +; CHECK-NEXT: [[Reg204:%[0-9]+]]:vgpr_32 = PHI [[Reg205:%[0-9]+]]:vgpr_32, %bb.18, [[Reg206:%[0-9]+]]:vgpr_32, %bb.17 +; CHECK-NEXT: [[Reg207:%[0-9]+]]:vgpr_32 = PHI [[Reg208:%[0-9]+]]:vgpr_32, %bb.18, [[Reg209:%[0-9]+]]:vgpr_32, %bb.17 +; CHECK-NEXT: [[Reg210:%[0-9]+]]:vgpr_32 = PHI [[Reg211:%[0-9]+]]:vgpr_32, %bb.18, [[Reg212:%[0-9]+]]:vgpr_32, %bb.17 +; CHECK-NEXT: [[Reg213:%[0-9]+]]:vgpr_32 = PHI [[Reg214:%[0-9]+]]:vgpr_32, %bb.18, [[Reg198]]:vgpr_32, %bb.17 +; CHECK-NEXT: [[Reg215:%[0-9]+]]:vgpr_32 = PHI [[Reg216:%[0-9]+]]:vgpr_32, %bb.18, [[Reg217:%[0-9]+]]:vgpr_32, %bb.17 +; CHECK-NEXT: SI_END_CF killed [[Reg218:%[0-9]+]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg219:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg18]]:vgpr_32, %subreg.sub0, killed [[Reg19]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg220:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg16]]:vgpr_32, %subreg.sub0, killed [[Reg17]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: S_BRANCH %bb.21 +; EMPTY: +; CHECK: bb.13.bb10: +; CHECK-NEXT: ; predecessors: %bb.5, %bb.11 +; CHECK-NEXT: successors: %bb.16(0x40000000), %bb.14(0x40000000); %bb.16(50.00%), %bb.14(50.00%) +; EMPTY: +; CHECK: [[Reg221:%[0-9]+]]:vgpr_32 = PHI [[Reg121]]:vgpr_32, %bb.5, [[Reg195]]:vgpr_32, %bb.11 +; CHECK-NEXT: SI_END_CF killed [[Reg135]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg222:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 100, killed [[Reg78]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg223:%[0-9]+]]:vgpr_32 = COPY [[Reg129]].sub1:vreg_128 +; CHECK-NEXT: [[Reg224:%[0-9]+]]:vgpr_32 = COPY [[Reg129]].sub2:vreg_128 +; CHECK-NEXT: [[Reg225:%[0-9]+]]:vgpr_32 = COPY [[Reg130]].sub2:vreg_128 +; CHECK-NEXT: [[Reg226:%[0-9]+]]:vgpr_32 = COPY [[Reg130]].sub3:vreg_128 +; CHECK-NEXT: [[Reg227:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[Reg129]].sub1:vreg_128, killed [[Reg130]].sub3:vreg_128, [[Reg130]].sub2:vreg_128, implicit $exec +; CHECK-NEXT: [[Reg228:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[Reg229:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg129]].sub2:vreg_128, %subreg.sub0, [[Reg228]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg230:%[0-9]+]]:vreg_64 = nsw V_LSHLREV_B64_pseudo_e64 2, killed [[Reg229]]:vreg_64, implicit $exec +; CHECK-NEXT: [[Reg231:%[0-9]+]]:vgpr_32, [[Reg232:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[Reg233:%[0-9]+]].sub0:vreg_64, [[Reg230]].sub0:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg234:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 killed [[Reg233]].sub1:vreg_64, killed [[Reg230]].sub1:vreg_64, killed [[Reg232]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg235:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg231]]:vgpr_32, %subreg.sub0, killed [[Reg234]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg235]]:vreg_64, killed [[Reg227]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.arrayidx1, addrspace 1) +; CHECK-NEXT: [[Reg236:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg119]]:sreg_32, %bb.14, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.16 +; EMPTY: +; CHECK: bb.14.Flow: +; CHECK-NEXT: ; predecessors: %bb.13, %bb.16 +; CHECK-NEXT: successors: %bb.15(0x40000000), %bb.17(0x40000000); %bb.15(50.00%), %bb.17(50.00%) +; EMPTY: +; CHECK: [[Reg237:%[0-9]+]]:vgpr_32 = PHI undef [[Reg238:%[0-9]+]]:vgpr_32, %bb.13, [[Reg239:%[0-9]+]]:vgpr_32, %bb.16 +; CHECK-NEXT: [[Reg240:%[0-9]+]]:vgpr_32 = PHI undef [[Reg238]]:vgpr_32, %bb.13, [[Reg241:%[0-9]+]]:vgpr_32, %bb.16 +; CHECK-NEXT: [[Reg242:%[0-9]+]]:vgpr_32 = PHI undef [[Reg238]]:vgpr_32, %bb.13, [[Reg243:%[0-9]+]]:vgpr_32, %bb.16 +; CHECK-NEXT: [[Reg244:%[0-9]+]]:vgpr_32 = PHI [[Reg132]]:vgpr_32, %bb.13, undef [[Reg245:%[0-9]+]]:vgpr_32, %bb.16 +; CHECK-NEXT: [[Reg246:%[0-9]+]]:vgpr_32 = PHI [[Reg131]]:vgpr_32, %bb.13, undef [[Reg247:%[0-9]+]]:vgpr_32, %bb.16 +; CHECK-NEXT: [[Reg248:%[0-9]+]]:vgpr_32 = PHI [[Reg249:%[0-9]+]]:vgpr_32, %bb.13, undef [[Reg250:%[0-9]+]]:vgpr_32, %bb.16 +; CHECK-NEXT: [[Reg251:%[0-9]+]]:vgpr_32 = PHI [[Reg252:%[0-9]+]]:vgpr_32, %bb.13, undef [[Reg253:%[0-9]+]]:vgpr_32, %bb.16 +; CHECK-NEXT: [[Reg254:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg255:%[0-9]+]]:vgpr_32, %subreg.sub0, killed [[Reg256:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg257:%[0-9]+]]:sreg_32 = SI_ELSE killed [[Reg236]]:sreg_32, %bb.17, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.15 +; EMPTY: +; CHECK: bb.15.bb11: +; CHECK-NEXT: ; predecessors: %bb.14 +; CHECK-NEXT: successors: %bb.17(0x80000000); %bb.17(100.00%) +; EMPTY: +; CHECK: [[Reg258:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg244]]:vgpr_32, [[Reg221]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg259:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec +; CHECK-NEXT: [[Reg260:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg246]]:vgpr_32, %subreg.sub0, killed [[Reg259]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg261:%[0-9]+]]:vreg_64 = nsw V_LSHLREV_B64_pseudo_e64 2, killed [[Reg260]]:vreg_64, implicit $exec +; CHECK-NEXT: [[Reg262:%[0-9]+]]:vgpr_32, [[Reg263:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 killed [[Reg248]]:vgpr_32, [[Reg261]].sub0:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg264:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 killed [[Reg251]]:vgpr_32, killed [[Reg261]].sub1:vreg_64, killed [[Reg263]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg265:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg262]]:vgpr_32, %subreg.sub0, killed [[Reg264]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg265]]:vreg_64, [[Reg258]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.arrayidx2, align 8, addrspace 1) +; CHECK-NEXT: S_BRANCH %bb.17 +; EMPTY: +; CHECK: bb.16.bb12: +; CHECK-NEXT: ; predecessors: %bb.13 +; CHECK-NEXT: successors: %bb.14(0x80000000); %bb.14(100.00%) +; EMPTY: +; CHECK: [[Reg266:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[Reg222]]:vgpr_32, [[Reg221]]:vgpr_32, [[Reg128]].sub3:vreg_256, implicit $exec +; CHECK-NEXT: [[Reg267:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg128]].sub5:vreg_256, %subreg.sub0, [[Reg228]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg268:%[0-9]+]]:vreg_64 = nsw V_LSHLREV_B64_pseudo_e64 2, killed [[Reg267]]:vreg_64, implicit $exec +; CHECK-NEXT: [[Reg269:%[0-9]+]]:vgpr_32, [[Reg270:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 killed [[Reg249]]:vgpr_32, [[Reg268]].sub0:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg271:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 killed [[Reg252]]:vgpr_32, killed [[Reg268]].sub1:vreg_64, killed [[Reg270]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg272:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg269]]:vgpr_32, %subreg.sub0, killed [[Reg271]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: GLOBAL_STORE_SHORT_D16_HI [[Reg272]]:vreg_64, [[Reg266]]:vgpr_32, 2, 0, implicit $exec :: (store (s16) into %ir.arrayidx3 + 2, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_SHORT killed [[Reg272]]:vreg_64, [[Reg266]]:vgpr_32, 0, 0, implicit $exec :: (store (s16) into %ir.arrayidx3, addrspace 1) +; CHECK-NEXT: [[Reg273:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg134]]:vgpr_32, [[Reg266]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg274:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg133]]:vgpr_32, %subreg.sub0, killed [[Reg228]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg275:%[0-9]+]]:vreg_64 = nsw V_LSHLREV_B64_pseudo_e64 2, killed [[Reg274]]:vreg_64, implicit $exec +; CHECK-NEXT: [[Reg276:%[0-9]+]]:vgpr_32, [[Reg277:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 killed [[Reg64]]:vgpr_32, [[Reg275]].sub0:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg278:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 killed [[Reg65]]:vgpr_32, killed [[Reg275]].sub1:vreg_64, killed [[Reg277]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg279:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg276]]:vgpr_32, %subreg.sub0, killed [[Reg278]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: GLOBAL_STORE_SHORT_D16_HI [[Reg279]]:vreg_64, [[Reg273]]:vgpr_32, 2, 0, implicit $exec :: (store (s16) into %ir.arrayidx5 + 2, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_SHORT killed [[Reg279]]:vreg_64, killed [[Reg273]]:vgpr_32, 0, 0, implicit $exec :: (store (s16) into %ir.arrayidx5, addrspace 1) +; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 +; CHECK-NEXT: [[Reg280:%[0-9]+]]:sreg_32_xexec_hi = COPY $sgpr32 +; CHECK-NEXT: [[Reg281:%[0-9]+]]:sreg_32 = S_ADD_I32 [[Reg280]]:sreg_32_xexec_hi, 1024, implicit-def dead $scc +; CHECK-NEXT: $sgpr32 = COPY killed [[Reg281]]:sreg_32 +; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $scc, implicit-def $sgpr32, implicit $sgpr32 +; CHECK-NEXT: [[Reg282:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD killed [[Reg63]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p6, addrspace 1) +; CHECK-NEXT: [[Reg283:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 2, killed [[Reg282]]:vgpr_32, implicit $exec +; CHECK-NEXT: SCRATCH_STORE_DWORD_SVS killed [[Reg266]]:vgpr_32, killed [[Reg283]]:vgpr_32, killed [[Reg280]]:sreg_32_xexec_hi, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %ir.arrayidx1111, addrspace 5) +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg66]]:vreg_64, [[Reg79]].sub0:vreg_64, 4, 0, implicit $exec :: (store (s32) into %ir.arrayidx444, addrspace 1) +; CHECK-NEXT: [[Reg284:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array4, target-flags(amdgpu-gotprel32-hi) @array4, implicit-def dead $scc +; CHECK-NEXT: [[Reg285:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg284]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg286:%[0-9]+]]:vreg_64 = COPY killed [[Reg285]]:sreg_64_xexec +; CHECK-NEXT: [[Reg241]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg286]]:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @array4) +; CHECK-NEXT: [[Reg287:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array2, target-flags(amdgpu-gotprel32-hi) @array2, implicit-def dead $scc +; CHECK-NEXT: [[Reg288:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg287]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg289:%[0-9]+]]:vreg_64 = COPY killed [[Reg288]]:sreg_64_xexec +; CHECK-NEXT: [[Reg290:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg289]]:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @array2) +; CHECK-NEXT: [[Reg291:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array3, target-flags(amdgpu-gotprel32-hi) @array3, implicit-def dead $scc +; CHECK-NEXT: [[Reg292:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg291]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg293:%[0-9]+]]:vreg_64 = COPY killed [[Reg292]]:sreg_64_xexec +; CHECK-NEXT: [[Reg294:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg293]]:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @array3) +; CHECK-NEXT: [[Reg295:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array5, target-flags(amdgpu-gotprel32-hi) @array5, implicit-def dead $scc +; CHECK-NEXT: [[Reg296:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg295]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg297:%[0-9]+]]:vreg_64 = COPY killed [[Reg296]]:sreg_64_xexec +; CHECK-NEXT: [[Reg239]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg297]]:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @array5) +; CHECK-NEXT: [[Reg298:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg294]]:vgpr_32, [[Reg239]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg299:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg298]]:vgpr_32, %subreg.sub0, undef [[Reg300:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg301:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 [[Reg241]]:vgpr_32, killed [[Reg290]]:vgpr_32, killed [[Reg299]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg243]]:vgpr_32 = COPY killed [[Reg301]].sub0:vreg_64 +; CHECK-NEXT: S_BRANCH %bb.14 +; EMPTY: +; CHECK: bb.17.bb13: +; CHECK-NEXT: ; predecessors: %bb.14, %bb.15 +; CHECK-NEXT: successors: %bb.12(0x80000000); %bb.12(100.00%) +; EMPTY: +; CHECK: [[Reg302:%[0-9]+]]:vgpr_32 = PHI [[Reg242]]:vgpr_32, %bb.14, [[Reg258]]:vgpr_32, %bb.15 +; CHECK-NEXT: [[Reg303:%[0-9]+]]:vgpr_32 = PHI [[Reg240]]:vgpr_32, %bb.14, [[Reg258]]:vgpr_32, %bb.15 +; CHECK-NEXT: [[Reg304:%[0-9]+]]:vgpr_32 = PHI [[Reg115]]:vgpr_32, %bb.14, [[Reg258]]:vgpr_32, %bb.15 +; CHECK-NEXT: [[Reg305:%[0-9]+]]:vgpr_32 = PHI [[Reg114]]:vgpr_32, %bb.14, [[Reg258]]:vgpr_32, %bb.15 +; CHECK-NEXT: [[Reg306:%[0-9]+]]:vgpr_32 = PHI [[Reg237]]:vgpr_32, %bb.14, [[Reg258]]:vgpr_32, %bb.15 +; CHECK-NEXT: [[Reg307:%[0-9]+]]:vgpr_32 = PHI [[Reg223]]:vgpr_32, %bb.14, [[Reg258]]:vgpr_32, %bb.15 +; CHECK-NEXT: [[Reg308:%[0-9]+]]:vgpr_32 = PHI [[Reg224]]:vgpr_32, %bb.14, [[Reg258]]:vgpr_32, %bb.15 +; CHECK-NEXT: [[Reg309:%[0-9]+]]:vgpr_32 = PHI [[Reg225]]:vgpr_32, %bb.14, [[Reg258]]:vgpr_32, %bb.15 +; CHECK-NEXT: [[Reg310:%[0-9]+]]:vgpr_32 = PHI [[Reg226]]:vgpr_32, %bb.14, [[Reg258]]:vgpr_32, %bb.15 +; CHECK-NEXT: [[Reg311:%[0-9]+]]:vgpr_32 = PHI [[Reg108]]:vgpr_32, %bb.14, [[Reg258]]:vgpr_32, %bb.15 +; CHECK-NEXT: SI_END_CF killed [[Reg257]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg217]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg302]]:vgpr_32, killed [[Reg221]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg209]]:vgpr_32 = V_SUB_U32_e64 [[Reg45]]:vgpr_32, killed [[Reg312:%[0-9]+]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg313:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg217]]:vgpr_32, [[Reg209]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg198]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg313]]:vgpr_32, killed [[Reg222]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg212]]:vgpr_32 = V_ADD_U32_e64 [[Reg198]]:vgpr_32, killed [[Reg79]].sub0:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg314:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 [[Reg212]]:vgpr_32, killed [[Reg75]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg315:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg314]]:vgpr_32, killed [[Reg303]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg316:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg315]]:vgpr_32, killed [[Reg304]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg206]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg316]]:vgpr_32, killed [[Reg305]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg317:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg307]]:vgpr_32, %subreg.sub0, undef [[Reg318:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg319:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 [[Reg206]]:vgpr_32, killed [[Reg306]]:vgpr_32, killed [[Reg317]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg320:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg319]].sub0:vreg_64, killed [[Reg308]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg321:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg320]]:vgpr_32, killed [[Reg309]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg322:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg321]]:vgpr_32, killed [[Reg310]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg203]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg322]]:vgpr_32, killed [[Reg311]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_SHORT_D16_HI [[Reg254]]:vreg_64, [[Reg203]]:vgpr_32, 2, 0, implicit $exec :: (store (s16) into %ir.p7 + 2, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_SHORT killed [[Reg254]]:vreg_64, [[Reg203]]:vgpr_32, 0, 0, implicit $exec :: (store (s16) into %ir.p7, addrspace 1) +; CHECK-NEXT: S_BRANCH %bb.12 +; EMPTY: +; CHECK: bb.18.Flow19: +; CHECK-NEXT: ; predecessors: %bb.2, %bb.20 +; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.12(0x40000000); %bb.4(50.00%), %bb.12(50.00%) +; EMPTY: +; CHECK: [[Reg197]]:vgpr_32 = PHI undef [[Reg323:%[0-9]+]]:vgpr_32, %bb.2, [[Reg324:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg200]]:vgpr_32 = PHI undef [[Reg323]]:vgpr_32, %bb.2, [[Reg325:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg202]]:vgpr_32 = PHI undef [[Reg323]]:vgpr_32, %bb.2, [[Reg326:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg205]]:vgpr_32 = PHI undef [[Reg323]]:vgpr_32, %bb.2, [[Reg327:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg208]]:vgpr_32 = PHI undef [[Reg323]]:vgpr_32, %bb.2, [[Reg328:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg211]]:vgpr_32 = PHI undef [[Reg323]]:vgpr_32, %bb.2, [[Reg329:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg214]]:vgpr_32 = PHI undef [[Reg323]]:vgpr_32, %bb.2, [[Reg330:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg216]]:vgpr_32 = PHI undef [[Reg323]]:vgpr_32, %bb.2, [[Reg331:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg80]]:vgpr_32 = PHI [[Reg34]]:vgpr_32, %bb.2, undef [[Reg332:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg110]]:vgpr_32 = PHI [[Reg41]]:vgpr_32, %bb.2, undef [[Reg333:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg312]]:vgpr_32 = PHI [[Reg42]]:vgpr_32, %bb.2, undef [[Reg334:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg71]]:vgpr_32 = PHI [[Reg3]]:vgpr_32, %bb.2, undef [[Reg335:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg67]]:vgpr_32 = PHI [[Reg8]]:vgpr_32, %bb.2, undef [[Reg336:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg68]]:vgpr_32 = PHI [[Reg9]]:vgpr_32, %bb.2, undef [[Reg337:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg249]]:vgpr_32 = PHI [[Reg10]]:vgpr_32, %bb.2, undef [[Reg338:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg252]]:vgpr_32 = PHI [[Reg11]]:vgpr_32, %bb.2, undef [[Reg339:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg64]]:vgpr_32 = PHI [[Reg12]]:vgpr_32, %bb.2, undef [[Reg340:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg65]]:vgpr_32 = PHI [[Reg13]]:vgpr_32, %bb.2, undef [[Reg341:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg255]]:vgpr_32 = PHI [[Reg14]]:vgpr_32, %bb.2, undef [[Reg342:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg256]]:vgpr_32 = PHI [[Reg15]]:vgpr_32, %bb.2, undef [[Reg343:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg59]]:vgpr_32 = PHI [[Reg35]]:vgpr_32, %bb.2, undef [[Reg344:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg58]]:vgpr_32 = PHI [[Reg36]]:vgpr_32, %bb.2, undef [[Reg345:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg62]]:vgpr_32 = PHI [[Reg37]]:vgpr_32, %bb.2, undef [[Reg346:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg61]]:vgpr_32 = PHI [[Reg38]]:vgpr_32, %bb.2, undef [[Reg347:%[0-9]+]]:vgpr_32, %bb.20 +; CHECK-NEXT: [[Reg130]]:vreg_128 = PHI [[Reg39]]:vreg_128, %bb.2, undef [[Reg348:%[0-9]+]]:vreg_128, %bb.20 +; CHECK-NEXT: [[Reg129]]:vreg_128 = PHI [[Reg40]]:vreg_128, %bb.2, undef [[Reg349:%[0-9]+]]:vreg_128, %bb.20 +; CHECK-NEXT: [[Reg233]]:vreg_64 = PHI [[Reg22]]:vreg_64, %bb.2, undef [[Reg350:%[0-9]+]]:vreg_64, %bb.20 +; CHECK-NEXT: [[Reg218]]:sreg_32 = SI_ELSE killed [[Reg46]]:sreg_32, %bb.12, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.4 +; EMPTY: +; CHECK: bb.19.bb4: +; CHECK-NEXT: ; predecessors: %bb.3, %bb.19 +; CHECK-NEXT: successors: %bb.20(0x04000000), %bb.19(0x7c000000); %bb.20(3.12%), %bb.19(96.88%) +; EMPTY: +; CHECK: [[Reg351:%[0-9]+]]:sreg_32 = PHI [[Reg56]]:sreg_32, %bb.3, [[Reg352:%[0-9]+]]:sreg_32, %bb.19 +; CHECK-NEXT: [[Reg353:%[0-9]+]]:sreg_32 = PHI [[Reg55]]:sreg_32, %bb.3, [[Reg354:%[0-9]+]]:sreg_32, %bb.19 +; CHECK-NEXT: [[Reg354]]:sreg_32 = S_ADD_I32 [[Reg353]]:sreg_32, 1, implicit-def dead $scc +; CHECK-NEXT: [[Reg355:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[Reg353]]:sreg_32, 2, implicit-def dead $scc +; CHECK-NEXT: [[Reg356:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 killed [[Reg355]]:sreg_32, [[Reg20]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg352]]:sreg_32 = SI_IF_BREAK killed [[Reg356]]:sreg_32, killed [[Reg351]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: [[Reg357:%[0-9]+]]:vgpr_32 = COPY [[Reg354]]:sreg_32, implicit $exec +; CHECK-NEXT: SI_LOOP [[Reg352]]:sreg_32, %bb.19, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.20 +; EMPTY: +; CHECK: bb.20.bb14.loopexit: +; CHECK-NEXT: ; predecessors: %bb.19 +; CHECK-NEXT: successors: %bb.18(0x80000000); %bb.18(100.00%) +; EMPTY: +; CHECK: SI_END_CF killed [[Reg352]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg358:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg50]]:vgpr_32, [[Reg357]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg359:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg54]]:vgpr_32, [[Reg357]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg360:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array7, target-flags(amdgpu-gotprel32-hi) @array7, implicit-def dead $scc +; CHECK-NEXT: [[Reg361:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg360]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg362:%[0-9]+]]:vreg_64 = COPY killed [[Reg361]]:sreg_64_xexec +; CHECK-NEXT: FLAT_STORE_DWORD [[Reg362]]:vreg_64, killed [[Reg358]]:vgpr_32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array7, i64 68)`) +; CHECK-NEXT: [[Reg363:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array9, target-flags(amdgpu-gotprel32-hi) @array9, implicit-def dead $scc +; CHECK-NEXT: [[Reg364:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg363]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg365:%[0-9]+]]:vreg_64 = COPY killed [[Reg364]]:sreg_64_xexec +; CHECK-NEXT: FLAT_STORE_DWORD [[Reg365]]:vreg_64, killed [[Reg359]]:vgpr_32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array9, i64 60)`) +; CHECK-NEXT: [[Reg366:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array6, target-flags(amdgpu-gotprel32-hi) @array6, implicit-def dead $scc +; CHECK-NEXT: [[Reg367:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg366]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg368:%[0-9]+]]:vreg_64 = COPY killed [[Reg367]]:sreg_64_xexec +; CHECK-NEXT: [[Reg369:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg368]]:vreg_64, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array6, i64 44)`) +; CHECK-NEXT: [[Reg331]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg369]]:vgpr_32, [[Reg357]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg370:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg362]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array7, i64 20)`) +; CHECK-NEXT: [[Reg330]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg370]]:vgpr_32, [[Reg357]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg371:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array8, target-flags(amdgpu-gotprel32-hi) @array8, implicit-def dead $scc +; CHECK-NEXT: [[Reg372:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg371]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg373:%[0-9]+]]:vreg_64 = COPY killed [[Reg372]]:sreg_64_xexec +; CHECK-NEXT: [[Reg374:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg373]]:vreg_64, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array8, i64 44)`, align 8) +; CHECK-NEXT: [[Reg329]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg374]]:vgpr_32, [[Reg357]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg375:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg365]]:vreg_64, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array9, i64 24)`) +; CHECK-NEXT: [[Reg328]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg375]]:vgpr_32, [[Reg357]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg376:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array2, target-flags(amdgpu-gotprel32-hi) @array2, implicit-def dead $scc +; CHECK-NEXT: [[Reg377:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg376]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg378:%[0-9]+]]:vreg_64 = COPY killed [[Reg377]]:sreg_64_xexec +; CHECK-NEXT: [[Reg379:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg378]]:vreg_64, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array2, i64 80)`) +; CHECK-NEXT: [[Reg327]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg379]]:vgpr_32, [[Reg357]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg380:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array3, target-flags(amdgpu-gotprel32-hi) @array3, implicit-def dead $scc +; CHECK-NEXT: [[Reg381:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg380]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg382:%[0-9]+]]:vreg_64 = COPY killed [[Reg381]]:sreg_64_xexec +; CHECK-NEXT: [[Reg383:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg382]]:vreg_64, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 80)`) +; CHECK-NEXT: [[Reg326]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg383]]:vgpr_32, [[Reg357]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg384:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array4, target-flags(amdgpu-gotprel32-hi) @array4, implicit-def dead $scc +; CHECK-NEXT: [[Reg385:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg384]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg386:%[0-9]+]]:vreg_64 = COPY killed [[Reg385]]:sreg_64_xexec +; CHECK-NEXT: [[Reg387:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg386]]:vreg_64, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array4, i64 80)`, align 8) +; CHECK-NEXT: [[Reg325]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg387]]:vgpr_32, [[Reg357]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg388:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array5, target-flags(amdgpu-gotprel32-hi) @array5, implicit-def dead $scc +; CHECK-NEXT: [[Reg389:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg388]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg390:%[0-9]+]]:vreg_64 = COPY killed [[Reg389]]:sreg_64_xexec +; CHECK-NEXT: [[Reg391:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg390]]:vreg_64, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array5, i64 80)`) +; CHECK-NEXT: [[Reg324]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg391]]:vgpr_32, killed [[Reg357]]:vgpr_32, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.18 +; EMPTY: +; CHECK: bb.21.bb14: +; CHECK-NEXT: ; predecessors: %bb.12 +; EMPTY: +; CHECK: [[Reg392:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg215]]:vgpr_32, killed [[Reg45]]:vgpr_32, killed [[Reg213]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg393:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg392]]:vgpr_32, killed [[Reg210]]:vgpr_32, killed [[Reg207]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg394:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg393]]:vgpr_32, killed [[Reg204]]:vgpr_32, 100, implicit $exec +; CHECK-NEXT: [[Reg395:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg394]]:vgpr_32, killed [[Reg201]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg396:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg395]]:vgpr_32, killed [[Reg199]]:vgpr_32, killed [[Reg196]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg220]]:vreg_64, killed [[Reg396]]:vgpr_32, 4, 0, implicit $exec :: (store (s32) into %ir.gep3, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg219]]:vreg_64, killed [[Reg394]]:vgpr_32, 4, 0, implicit $exec :: (store (s32) into %ir.gep4, addrspace 1) +; CHECK-NEXT: S_ENDPGM 0 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg20]] = 64.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 86.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 85.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 85.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 84.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 63.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 61.0 +; CHECK-NEXT: Next-use distance of Register [[Reg13]] = 59.0 +; CHECK-NEXT: Next-use distance of Register [[Reg12]] = 57.0 +; CHECK-NEXT: Next-use distance of Register [[Reg11]] = 55.0 +; CHECK-NEXT: Next-use distance of Register [[Reg10]] = 53.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 51.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 49.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg5]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 43.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 18.0 +; CHECK-NEXT: Next-use distance of Register [[Reg22]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 20.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 21.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 35.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 35.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 35.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 35.0 +; CHECK-NEXT: Next-use distance of Register [[Reg39]] = 35.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 45.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 9009.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg52]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg53]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg54]] = 9006.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg56]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg57]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg60]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg63]] = 47.0 +; CHECK-NEXT: Next-use distance of Register [[Reg66]] = 12.0 +; CHECK-NEXT: Next-use distance of Register [[Reg69]] = 58.0 +; CHECK-NEXT: Next-use distance of Register [[Reg70]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg72]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg73]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg74]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg75]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg76]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg77]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg78]] = 50.0 +; CHECK-NEXT: Next-use distance of Register [[Reg79]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg81]] = 47.0 +; CHECK-NEXT: Next-use distance of Register [[Reg82]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg83]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg84]] = 48.0 +; CHECK-NEXT: Next-use distance of Register [[Reg85]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg86]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg87]] = 45.0 +; CHECK-NEXT: Next-use distance of Register [[Reg88]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg89]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg90]] = 43.0 +; CHECK-NEXT: Next-use distance of Register [[Reg91]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg92]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg93]] = 40.0 +; CHECK-NEXT: Next-use distance of Register [[Reg94]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg95]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg96]] = 38.0 +; CHECK-NEXT: Next-use distance of Register [[Reg97]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg98]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg99]] = 35.0 +; CHECK-NEXT: Next-use distance of Register [[Reg100]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg101]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg102]] = 33.0 +; CHECK-NEXT: Next-use distance of Register [[Reg103]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg104]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg105]] = 30.0 +; CHECK-NEXT: Next-use distance of Register [[Reg106]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg107]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg108]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg109]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg111]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg112]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg113]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg114]] = 51.0 +; CHECK-NEXT: Next-use distance of Register [[Reg115]] = 49.0 +; CHECK-NEXT: Next-use distance of Register [[Reg116]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg117]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg118]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg119]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg120]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg121]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg124]] = 21.0 +; CHECK-NEXT: Next-use distance of Register [[Reg126]] = 20.0 +; CHECK-NEXT: Next-use distance of Register [[Reg128]] = 24.0 +; CHECK-NEXT: Next-use distance of Register [[Reg131]] = 27.0 +; CHECK-NEXT: Next-use distance of Register [[Reg132]] = 25.0 +; CHECK-NEXT: Next-use distance of Register [[Reg133]] = 30.0 +; CHECK-NEXT: Next-use distance of Register [[Reg134]] = 28.0 +; CHECK-NEXT: Next-use distance of Register [[Reg135]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg136]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg137]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg138]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg139]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg140]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg141]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg142]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg143]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg144]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg145]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg146]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg149]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg151]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg153]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg154]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg155]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg156]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg157]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg158]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg159]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg160]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg161]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg148]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg162]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg163]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg164]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg165]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg166]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg167]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg168]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg169]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg170]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg171]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg172]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg173]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg174]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg175]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg176]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg177]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg178]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg179]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg180]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg181]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg182]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg183]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg184]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg185]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg186]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg187]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg123]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg188]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg189]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg190]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg191]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg192]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg193]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg194]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg195]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg196]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg199]] = 15.0 +; CHECK-NEXT: Next-use distance of Register [[Reg201]] = 13.0 +; CHECK-NEXT: Next-use distance of Register [[Reg204]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg207]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg210]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg213]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg215]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg219]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg220]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg221]] = 17.0 +; CHECK-NEXT: Next-use distance of Register [[Reg222]] = 15.0 +; CHECK-NEXT: Next-use distance of Register [[Reg223]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg224]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg225]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg226]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg227]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg228]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg229]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg230]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg231]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg232]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg234]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg235]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg236]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg237]] = 14.0 +; CHECK-NEXT: Next-use distance of Register [[Reg240]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg242]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg244]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg246]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg248]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg251]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg254]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg257]] = 12.0 +; CHECK-NEXT: Next-use distance of Register [[Reg258]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg259]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg260]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg261]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg262]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg263]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg264]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg265]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg266]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg267]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg268]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg269]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg270]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg271]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg272]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg273]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg274]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg275]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg276]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg277]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg278]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg279]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg280]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg281]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg282]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg283]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg284]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg285]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg286]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg241]] = 15.0 +; CHECK-NEXT: Next-use distance of Register [[Reg287]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg288]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg289]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg290]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg291]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg292]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg293]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg294]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg295]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg296]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg297]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg239]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg298]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg299]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg301]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg243]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg302]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg303]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg304]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg305]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg306]] = 17.0 +; CHECK-NEXT: Next-use distance of Register [[Reg307]] = 15.0 +; CHECK-NEXT: Next-use distance of Register [[Reg308]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg309]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg310]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg311]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg217]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg209]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg313]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg198]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg212]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg314]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg315]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg316]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg206]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg317]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg319]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg320]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg321]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg322]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg203]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg197]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg200]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg202]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg205]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg208]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg211]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg214]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg216]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg80]] = 34.0 +; CHECK-NEXT: Next-use distance of Register [[Reg110]] = 62.0 +; CHECK-NEXT: Next-use distance of Register [[Reg312]] = 129.0 +; CHECK-NEXT: Next-use distance of Register [[Reg71]] = 23.0 +; CHECK-NEXT: Next-use distance of Register [[Reg67]] = 20.0 +; CHECK-NEXT: Next-use distance of Register [[Reg68]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg249]] = 106.0 +; CHECK-NEXT: Next-use distance of Register [[Reg252]] = 106.0 +; CHECK-NEXT: Next-use distance of Register [[Reg64]] = 15.0 +; CHECK-NEXT: Next-use distance of Register [[Reg65]] = 14.0 +; CHECK-NEXT: Next-use distance of Register [[Reg255]] = 106.0 +; CHECK-NEXT: Next-use distance of Register [[Reg256]] = 105.0 +; CHECK-NEXT: Next-use distance of Register [[Reg59]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg58]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg62]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg61]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg130]] = 69.0 +; CHECK-NEXT: Next-use distance of Register [[Reg129]] = 68.0 +; CHECK-NEXT: Next-use distance of Register [[Reg233]] = 85.0 +; CHECK-NEXT: Next-use distance of Register [[Reg218]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg351]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg353]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg354]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg355]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg356]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg352]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg357]] = 9002.0 +; CHECK-NEXT: Next-use distance of Register [[Reg358]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg359]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg360]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg361]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg362]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg363]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg364]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg365]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg366]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg367]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg368]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg369]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg331]] = 38.0 +; CHECK-NEXT: Next-use distance of Register [[Reg370]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg330]] = 35.0 +; CHECK-NEXT: Next-use distance of Register [[Reg371]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg372]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg373]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg374]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg329]] = 29.0 +; CHECK-NEXT: Next-use distance of Register [[Reg375]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg328]] = 26.0 +; CHECK-NEXT: Next-use distance of Register [[Reg376]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg377]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg378]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg379]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg327]] = 20.0 +; CHECK-NEXT: Next-use distance of Register [[Reg380]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg381]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg382]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg383]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg326]] = 14.0 +; CHECK-NEXT: Next-use distance of Register [[Reg384]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg385]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg386]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg387]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg325]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg388]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg389]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg390]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg391]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg324]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg392]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg393]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg394]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg395]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg396]] = 1.0 +entry: + %ld1 = load i32, ptr addrspace(1) %p1, align 1 + %gep1 = getelementptr inbounds i32, ptr addrspace(1) %p1, i64 3 + %ld2 = load i32, ptr addrspace(1) %gep1, align 1 + %load1 = load i32, ptr addrspace(1) %p3, align 4 + %tmp1 = add i32 %load1, %ld1 + %load2 = load <8 x i32>, ptr addrspace(1) %p3, align 1 + store i32 %tmp1, ptr addrspace(1) %p3 + %add1 = add i32 %ld1, %tmp1 + br i1 %cond1, label %bb1, label %bb2 + +bb1: + br label %bb2 + +bb2: + %phi0 = phi i32 [ 100, %bb1 ], [ %add1, %entry ] + %ld3 = load i32, ptr addrspace(3) %p2, align 1 + %add2 = add i32 %ld3, 100 + br i1 %cond2, label %bb3, label %bb4 + +bb3: + %mul1 = mul i32 %ld1, %phi0 + %add3 = add i32 %mul1, 1000 + br label %bb5 + +bb5: + %add30 = add i32 %add3, 900 + %gep2 = getelementptr inbounds i32, ptr addrspace(3) %p2, i64 3 + %ld4 = load i32, ptr addrspace(3) %gep2, align 8 + %add5 = add i32 %ld4, %add30 + %load3 = load <8 x i32>, ptr addrspace(1) %p4, align 1 + %load4 = load i32, ptr addrspace(1) %p4, align 2 + %tmp2 = add i32 %load4, %tmp1 + store i32 %tmp2, ptr addrspace(1) %p4 + %stack = alloca [5 x i32], align 4, addrspace(5) + %load6 = load i32, ptr addrspace(1) %p6, align 4 + %arrayidx11 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 %load6 + store i32 %phi0, ptr addrspace(5) %arrayidx11, align 4 + %arrayidx22 = getelementptr inbounds i32, ptr addrspace(1) %p6, i32 1 + %load7 = load i32, ptr addrspace(1) %arrayidx22, align 4 + %arrayidx33 = getelementptr inbounds [5 x i32], ptr addrspace(5) %stack, i32 0, i32 %load7 + store i32 %ld3, ptr addrspace(5) %arrayidx33, align 2 + %xor = xor i1 %cond1, %cond2 + br i1 %xor, label %bb6, label %bb7 + +bb6: + %and = and i1 %cond1, %cond2 + %idx10 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 1, i64 0 + %val0 = load i32, i32* %idx10, align 4 + %idx20 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 0, i64 1 + store i32 %val0, i32 *%idx20 + br i1 %and, label %bb8, label %bb9 + +bb8: + %add6 = add i32 %ld2, %add5 + %idx12 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 1, i64 2 + %val2 = load i32, i32* %idx12, align 4 + %idx22 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 3, i64 2 + store i32 %val2, i32 *%idx22 + br label %bb10 + +bb9: + %mul2 = mul i32 %ld2, %add5 + %idx13 = getelementptr inbounds [5 x i32], [5 x i32]* @array5, i64 1, i64 0 + %val3 = load i32, i32* %idx13, align 4 + %idx23 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 3, i64 0 + store i32 %val3, i32 *%idx23 + br label %bb10 + +bb7: + %sub1 = sub i32 %ld4, %add5 + %mul3 = mul i32 %sub1, %ld3 + %div = udiv i32 %mul3, %ld1 + %add7 = add i32 %div, %ld2 + %idx14 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 4, i64 1 + %val4 = load i32, i32* %idx14, align 4 + %idx24 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 3, i64 0 + store i32 %val4, i32 *%idx24 + br label %bb10 + +bb10: + %phi2 = phi i32 [ %add6, %bb8 ], [ %mul2, %bb9], [ %add7, %bb7 ] + %add8 = add i32 %add2, %phi2 + %extract1 = extractelement < 8 x i32> %load2, i32 1 + %extract2 = extractelement < 8 x i32> %load2, i32 2 + %extract3 = extractelement < 8 x i32> %load2, i32 6 + %extract4 = extractelement < 8 x i32> %load2, i32 7 + %add101 = add i32 %extract1, %extract4 + %add102 = add i32 %add101, %extract3 + %idx1 = zext i32 %extract2 to i64 + %arrayidx1 = getelementptr inbounds i32, ptr addrspace(1) %p1, i64 %idx1 + store i32 %add102, ptr addrspace(1) %arrayidx1, align 4 + %cond3 = icmp ne i1 %cond1, %cond2 + br i1 %cond3, label %bb11, label %bb12 + +bb11: + %extract5 = extractelement < 8 x i32> %load3, i32 3 + %extract6 = extractelement < 8 x i32> %load3, i32 5 + %tmp3 = add i32 %extract5, %phi2 + %idx2 = zext i32 %extract6 to i64 + %arrayidx2 = getelementptr inbounds i32, ptr addrspace(1) %p5, i64 %idx2 + store i32 %tmp3, ptr addrspace(1) %arrayidx2, align 8 + br label %bb13 + +bb12: + %extract7 = extractelement < 8 x i32> %load3, i32 1 + %extract8 = extractelement < 8 x i32> %load3, i32 2 + %extract9 = extractelement < 8 x i32> %load2, i32 3 + %extract10 = extractelement < 8 x i32> %load2, i32 5 + %tmp4 = add i32 %extract9, %add8 + %idx3 = zext i32 %extract10 to i64 + %arrayidx3 = getelementptr inbounds i32, ptr addrspace(1) %p5, i64 %idx3 + store i32 %tmp4, ptr addrspace(1) %arrayidx3, align 2 + %tmp5 = add i32 %extract7, %tmp4 + %idx4 = zext i32 %extract8 to i64 + %arrayidx5 = getelementptr inbounds i32, ptr addrspace(1) %p6, i64 %idx4 + store i32 %tmp5, ptr addrspace(1) %arrayidx5, align 2 + %array1 = alloca [5 x i32], align 4, addrspace(5) + %load8 = load i32, ptr addrspace(1) %p6, align 4 + %arrayidx111 = getelementptr inbounds [5 x i32], ptr addrspace(5) %array1, i32 2, i32 %load8 + store i32 %tmp4, ptr addrspace(5) %arrayidx111, align 4 + %arrayidx222 = getelementptr inbounds i32, ptr addrspace(1) %p6, i32 1 + %load9 = load i32, ptr addrspace(1) %arrayidx222, align 4 + %arrayidx333 = getelementptr inbounds [5 x i32], ptr addrspace(5) %array1, i32 1, i32 %load9 + %load10 = load i32, ptr addrspace(5) %arrayidx333 + %arrayidx444 = getelementptr inbounds i32, ptr addrspace(1) %p4, i32 1 + store i32 %add30, ptr addrspace(1) %arrayidx444 + %idx15 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 0, i64 0 + %val5 = load i32, i32* %idx15, align 4 + %idx16 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 0, i64 0 + %val6 = load i32, i32* %idx16, align 4 + %idx17 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 0, i64 0 + %val7 = load i32, i32* %idx17, align 4 + %idx18 = getelementptr inbounds [5 x i32], [5 x i32]* @array5, i64 0, i64 0 + %val8 = load i32, i32* %idx18, align 4 + %mul10 = mul i32 %val5, %val6 + %mul11 = mul i32 %val7, %val8 + %add100 = add i32 %mul10, %mul11 + br label %bb13 + +bb13: + %phi3 = phi i32 [ %tmp3, %bb11 ], [ %add100, %bb12] + %phi4 = phi i32 [ %tmp3, %bb11 ], [ %val5, %bb12] + %phi5 = phi i32 [ %tmp3, %bb11 ], [ %load7, %bb12] + %phi6 = phi i32 [ %tmp3, %bb11 ], [ %load6, %bb12] + %phi7 = phi i32 [ %tmp3, %bb11 ], [ %val8, %bb12] + %phi8 = phi i32 [ %tmp3, %bb11 ], [ %extract1, %bb12] + %phi9 = phi i32 [ %tmp3, %bb11 ], [ %extract2, %bb12] + %phi10 = phi i32 [ %tmp3, %bb11 ], [ %extract3, %bb12] + %phi11 = phi i32 [ %tmp3, %bb11 ], [ %extract4, %bb12] + %phi12 = phi i32 [ %tmp3, %bb11 ], [ %load4, %bb12] + %add200 = add i32 %phi3, %phi2 + %add300 = sub i32 %phi0, %add1 + %add400 = add i32 %add200, %add300 + %add500 = mul i32 %add400, %add2 + %add600 = add i32 %add500, %add30 + %add700 = sub i32 %add600, %ld4 + %add800 = add i32 %add700, %phi4 + %add900 = mul i32 %add800, %phi5 + %add1000 = sub i32 %add900, %phi6 + %add1100 = mul i32 %add1000, %phi7 + %add1200 = add i32 %add1100, %phi8 + %add1300 = sub i32 %add1200, %phi9 + %add1400 = sub i32 %add1300, %phi10 + %add1500 = add i32 %add1400, %phi11 + %add1600 = mul i32 %add1500, %phi12 + store i32 %add1600, ptr addrspace(1) %p7, align 2 + br label %bb14 + +bb4: + %phi13 = phi i32 [ 0, %bb2 ], [ %ind, %bb4 ] + %idx600 = getelementptr inbounds [5 x i32], [5 x i32]* @array6, i64 1, i64 2 + %val600 = load i32, i32* %idx600, align 4 + %idx700 = getelementptr inbounds [5 x i32], [5 x i32]* @array7, i64 3, i64 2 + %addval600 = add i32 %val600, %phi13 + store i32 %addval600, i32 *%idx700 + %idx800 = getelementptr inbounds [5 x i32], [5 x i32]* @array8, i64 1, i64 0 + %val800 = load i32, i32* %idx800, align 4 + %idx900 = getelementptr inbounds [5 x i32], [5 x i32]* @array9, i64 3, i64 0 + %addval800 = add i32 %val800, %phi13 + store i32 %addval800, i32 *%idx900 + %idx601 = getelementptr inbounds [5 x i32], [5 x i32]* @array6, i64 2, i64 1 + %val601 = load i32, i32* %idx601, align 1 + %val611 = mul i32 %val601, %phi13 + %idx701 = getelementptr inbounds [5 x i32], [5 x i32]* @array7, i64 1, i64 0 + %val701 = load i32, i32* %idx701, align 2 + %val711 = sub i32 %val701, %phi13 + %idx801 = getelementptr inbounds [5 x i32], [5 x i32]* @array8, i64 2, i64 1 + %val801 = load i32, i32* %idx801, align 8 + %val811 = add i32 %val801, %phi13 + %idx901 = getelementptr inbounds [5 x i32], [5 x i32]* @array9, i64 1, i64 1 + %val901 = load i32, i32* %idx901, align 1 + %val911 = mul i32 %val901, %phi13 + %idx602 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 4, i64 0 + %val602 = load i32, i32* %idx602, align 1 + %val612 = add i32 %val602, %phi13 + %idx702 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 4, i64 0 + %val702 = load i32, i32* %idx702, align 2 + %val712 = sub i32 %val702, %phi13 + %idx802 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 4, i64 0 + %val802 = load i32, i32* %idx802, align 8 + %val812 = add i32 %val802, %phi13 + %idx902 = getelementptr inbounds [5 x i32], [5 x i32]* @array5, i64 4, i64 0 + %val902 = load i32, i32* %idx902, align 1 + %val912 = mul i32 %val902, %phi13 + %ind = add i32 %phi13, 1 + %loop.cond = icmp ult i32 %ind, %TC1 + br i1 %loop.cond, label %bb4, label %bb14 + +bb14: + %phi14 = phi i32 [ %add200, %bb13 ], [ %val611, %bb4 ] + %phi15 = phi i32 [ %add500, %bb13 ], [ %val711, %bb4 ] + %phi16 = phi i32 [ %add600, %bb13 ], [ %val811, %bb4 ] + %phi17 = phi i32 [ %add300, %bb13 ], [ %val911, %bb4 ] + %phi18 = phi i32 [ %add1000, %bb13 ], [ %val612, %bb4 ] + %phi19 = phi i32 [ %add1600, %bb13 ], [ %val712, %bb4 ] + %phi20 = phi i32 [ %add500, %bb13 ], [ %val812, %bb4 ] + %phi21 = phi i32 [ %add500, %bb13 ], [ %val912, %bb4 ] + %addall1 = add i32 %phi14, %phi0 + %addall2 = add i32 %addall1, %phi15 + %addall3 = add i32 %addall2, 100 + %addall4 = add i32 %addall3, %phi16 + %addall5 = add i32 %addall4, %phi17 + %addall6 = add i32 %addall5, %phi18 + %addall7 = add i32 %addall6, %phi19 + %addall8 = add i32 %addall7, %phi20 + %addall9 = add i32 %addall8, %phi21 + %gep3 = getelementptr inbounds i32, ptr addrspace(1) %p8, i64 1 + store i32 %addall9, ptr addrspace(1) %gep3 + %gep4 = getelementptr inbounds i32, ptr addrspace(1) %p9, i64 1 + store i32 %addall6, ptr addrspace(1) %gep4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_nested_loops.ll b/llvm/test/CodeGen/AMDGPU/test_ers_nested_loops.ll new file mode 100644 index 0000000000000..cdcf5cae6cfc1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_nested_loops.ll @@ -0,0 +1,289 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +; +; bb.0.entry +; | +; bb.1.loop1.header<-------+ +; | | +; bb.2.loop2.header<---+ | +; | | | +; bb.3.loop3<--+ | | +; | | | | +; +--------+ | | +; | | | +; bb.4.loop2.latch-----+ | +; | | +; bb.5.loop1.latch---------+ +; | +; bb.6.exit +; +define amdgpu_ps i32 @test(ptr addrspace(1) %p1, ptr addrspace(1) %p2, ptr addrspace(1) %p3, ptr addrspace(1) %p4, ptr addrspace(1) %p5, i32 %TC1, i32 %TC2, i32 %TC3) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]], $vgpr5 in [[Reg6:%[0-9]+]], $vgpr6 in [[Reg7:%[0-9]+]], $vgpr7 in [[Reg8:%[0-9]+]], $vgpr8 in [[Reg9:%[0-9]+]], $vgpr9 in [[Reg10:%[0-9]+]], $vgpr10 in [[Reg11:%[0-9]+]], $vgpr11 in [[Reg12:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.1(0x80000000); %bb.1(100.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 +; CHECK-NEXT: [[Reg12]]:vgpr_32 = COPY killed $vgpr11 +; CHECK-NEXT: [[Reg11]]:vgpr_32 = COPY killed $vgpr10 +; CHECK-NEXT: [[Reg10]]:vgpr_32 = COPY killed $vgpr9 +; CHECK-NEXT: [[Reg9]]:vgpr_32 = COPY killed $vgpr8 +; CHECK-NEXT: [[Reg8]]:vgpr_32 = COPY killed $vgpr7 +; CHECK-NEXT: [[Reg7]]:vgpr_32 = COPY killed $vgpr6 +; CHECK-NEXT: [[Reg6]]:vgpr_32 = COPY killed $vgpr5 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg13:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg9]]:vgpr_32, %subreg.sub0, killed [[Reg10]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg14:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg7]]:vgpr_32, %subreg.sub0, killed [[Reg8]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg15:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg5]]:vgpr_32, %subreg.sub0, killed [[Reg6]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg16:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg3]]:vgpr_32, %subreg.sub0, killed [[Reg4]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg17:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg1]]:vgpr_32, %subreg.sub0, killed [[Reg2]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg18:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg17]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg19:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg17]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg20:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg19]]:vgpr_32, 8, killed [[Reg18]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg21:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg17]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg22:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[Reg17]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg23:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg22]]:vgpr_32, 8, killed [[Reg21]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg24:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg23]]:vgpr_32, 16, killed [[Reg20]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg25:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; EMPTY: +; CHECK: bb.1.loop1.header: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.5 +; CHECK-NEXT: successors: %bb.2(0x80000000); %bb.2(100.00%) +; EMPTY: +; CHECK: [[Reg26:%[0-9]+]]:sreg_32 = PHI [[Reg25]]:sreg_32, %bb.0, [[Reg27:%[0-9]+]]:sreg_32, %bb.5 +; CHECK-NEXT: [[Reg28:%[0-9]+]]:sreg_32 = PHI [[Reg25]]:sreg_32, %bb.0, [[Reg29:%[0-9]+]]:sreg_32, %bb.5 +; CHECK-NEXT: [[Reg30:%[0-9]+]]:vgpr_32 = PHI [[Reg24]]:vgpr_32, %bb.0, [[Reg31:%[0-9]+]]:vgpr_32, %bb.5 +; CHECK-NEXT: [[Reg32:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 13, implicit $exec +; EMPTY: +; CHECK: bb.2.loop2.header: +; CHECK-NEXT: ; predecessors: %bb.1, %bb.4 +; CHECK-NEXT: successors: %bb.3(0x80000000); %bb.3(100.00%) +; EMPTY: +; CHECK: [[Reg33:%[0-9]+]]:sreg_32 = PHI [[Reg25]]:sreg_32, %bb.1, [[Reg34:%[0-9]+]]:sreg_32, %bb.4 +; CHECK-NEXT: [[Reg35:%[0-9]+]]:sreg_32 = PHI [[Reg25]]:sreg_32, %bb.1, [[Reg36:%[0-9]+]]:sreg_32, %bb.4 +; CHECK-NEXT: [[Reg37:%[0-9]+]]:vgpr_32 = PHI [[Reg32]]:vgpr_32, %bb.1, [[Reg38:%[0-9]+]]:vgpr_32, %bb.4 +; CHECK-NEXT: [[Reg39:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg16]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p2, addrspace 1) +; CHECK-NEXT: [[Reg40:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg16]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p2 + 1, addrspace 1) +; CHECK-NEXT: [[Reg41:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg40]]:vgpr_32, 8, killed [[Reg39]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg42:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg16]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p2 + 2, addrspace 1) +; CHECK-NEXT: [[Reg43:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg16]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p2 + 3, addrspace 1) +; CHECK-NEXT: [[Reg44:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg43]]:vgpr_32, 8, killed [[Reg42]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg45:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg44]]:vgpr_32, 16, killed [[Reg41]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg35]]:sreg_32, [[Reg45]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_BYTE_D16_HI [[Reg14]]:vreg_64, [[Reg46]]:vgpr_32, 2, 0, implicit $exec :: (store (s8) into %ir.p4 + 2, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_BYTE [[Reg14]]:vreg_64, [[Reg46]]:vgpr_32, 0, 0, implicit $exec :: (store (s8) into %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg47:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 24, [[Reg46]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_BYTE [[Reg14]]:vreg_64, killed [[Reg47]]:vgpr_32, 3, 0, implicit $exec :: (store (s8) into %ir.p4 + 3, addrspace 1) +; CHECK-NEXT: [[Reg48:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 8, killed [[Reg46]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_BYTE [[Reg14]]:vreg_64, killed [[Reg48]]:vgpr_32, 1, 0, implicit $exec :: (store (s8) into %ir.p4 + 1, addrspace 1) +; CHECK-NEXT: [[Reg49:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; EMPTY: +; CHECK: bb.3.loop3: +; CHECK-NEXT: ; predecessors: %bb.2, %bb.3 +; CHECK-NEXT: successors: %bb.4(0x04000000), %bb.3(0x7c000000); %bb.4(3.12%), %bb.3(96.88%) +; EMPTY: +; CHECK: [[Reg50:%[0-9]+]]:sreg_32 = PHI [[Reg49]]:sreg_32, %bb.2, [[Reg51:%[0-9]+]]:sreg_32, %bb.3 +; CHECK-NEXT: [[Reg52:%[0-9]+]]:sreg_32 = PHI [[Reg49]]:sreg_32, %bb.2, [[Reg53:%[0-9]+]]:sreg_32, %bb.3 +; CHECK-NEXT: [[Reg53]]:sreg_32 = S_ADD_I32 killed [[Reg52]]:sreg_32, 3, implicit-def dead $scc +; CHECK-NEXT: [[Reg54:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg15]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg55:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg15]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p3 + 1, addrspace 1) +; CHECK-NEXT: [[Reg56:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg55]]:vgpr_32, 8, killed [[Reg54]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg57:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg15]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p3 + 2, addrspace 1) +; CHECK-NEXT: [[Reg58:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg15]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p3 + 3, addrspace 1) +; CHECK-NEXT: [[Reg59:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg58]]:vgpr_32, 8, killed [[Reg57]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg60:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg59]]:vgpr_32, 16, killed [[Reg56]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg61:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg53]]:sreg_32, [[Reg60]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_BYTE_D16_HI [[Reg13]]:vreg_64, [[Reg61]]:vgpr_32, 2, 0, implicit $exec :: (store (s8) into %ir.p5 + 2, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_BYTE [[Reg13]]:vreg_64, [[Reg61]]:vgpr_32, 0, 0, implicit $exec :: (store (s8) into %ir.p5, addrspace 1) +; CHECK-NEXT: [[Reg62:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 24, [[Reg61]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_BYTE [[Reg13]]:vreg_64, killed [[Reg62]]:vgpr_32, 3, 0, implicit $exec :: (store (s8) into %ir.p5 + 3, addrspace 1) +; CHECK-NEXT: [[Reg63:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 8, killed [[Reg61]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_BYTE [[Reg13]]:vreg_64, killed [[Reg63]]:vgpr_32, 1, 0, implicit $exec :: (store (s8) into %ir.p5 + 1, addrspace 1) +; CHECK-NEXT: [[Reg64:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg53]]:sreg_32, [[Reg11]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg51]]:sreg_32 = SI_IF_BREAK killed [[Reg64]]:sreg_32, killed [[Reg50]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: [[Reg65:%[0-9]+]]:vgpr_32 = COPY [[Reg53]]:sreg_32, implicit $exec +; CHECK-NEXT: SI_LOOP [[Reg51]]:sreg_32, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.4 +; EMPTY: +; CHECK: bb.4.loop2.latch: +; CHECK-NEXT: ; predecessors: %bb.3 +; CHECK-NEXT: successors: %bb.5(0x04000000), %bb.2(0x7c000000); %bb.5(3.12%), %bb.2(96.88%) +; EMPTY: +; CHECK: SI_END_CF killed [[Reg51]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg66:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg60]]:vgpr_32, [[Reg65]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg38]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg30]]:vgpr_32, [[Reg66]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg36]]:sreg_32 = S_ADD_I32 killed [[Reg35]]:sreg_32, 2, implicit-def dead $scc +; CHECK-NEXT: [[Reg67:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg36]]:sreg_32, [[Reg12]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg34]]:sreg_32 = SI_IF_BREAK killed [[Reg67]]:sreg_32, killed [[Reg33]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: SI_LOOP [[Reg34]]:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.5 +; EMPTY: +; CHECK: bb.5.loop1.latch: +; CHECK-NEXT: ; predecessors: %bb.4 +; CHECK-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000); %bb.6(3.12%), %bb.1(96.88%) +; EMPTY: +; CHECK: SI_END_CF killed [[Reg34]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg31]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg45]]:vgpr_32, killed [[Reg65]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg29]]:sreg_32 = S_ADD_I32 killed [[Reg28]]:sreg_32, 1, implicit-def dead $scc +; CHECK-NEXT: [[Reg68:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg29]]:sreg_32, [[Reg11]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg27]]:sreg_32 = SI_IF_BREAK killed [[Reg68]]:sreg_32, killed [[Reg26]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: SI_LOOP [[Reg27]]:sreg_32, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.6 +; EMPTY: +; CHECK: bb.6.exit: +; CHECK-NEXT: ; predecessors: %bb.5 +; EMPTY: +; CHECK: SI_END_CF killed [[Reg27]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg69:%[0-9]+]]:vgpr_32 = V_MAX_U32_e64 1, killed [[Reg11]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg70:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg69]]:vgpr_32, killed [[Reg24]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg71:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 [[Reg37]]:vgpr_32, killed [[Reg30]]:vgpr_32, [[Reg66]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg72:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg66]]:vgpr_32, killed [[Reg38]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg73:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg72]]:vgpr_32, killed [[Reg71]]:vgpr_32, killed [[Reg37]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg74:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg70]]:vgpr_32, killed [[Reg73]]:vgpr_32, -1, implicit $exec +; CHECK-NEXT: [[Reg75:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 killed [[Reg74]]:vgpr_32, implicit $exec +; CHECK-NEXT: $sgpr0 = COPY killed [[Reg75]]:sreg_32_xm0 +; CHECK-NEXT: SI_RETURN_TO_EPILOG killed $sgpr0 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg12]] = 22051.0 +; CHECK-NEXT: Next-use distance of Register [[Reg11]] = 63.0 +; CHECK-NEXT: Next-use distance of Register [[Reg10]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg5]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg13]] = 46.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 27.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 36.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 17.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg22]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 22026008.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 22026005.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 22022.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 22023.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 22026011004.0 +; CHECK-NEXT: Next-use distance of Register [[Reg39]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 18.0 +; CHECK-NEXT: Next-use distance of Register [[Reg52]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg53]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg54]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg56]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg57]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg58]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg59]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg60]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg61]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg62]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg63]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg64]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg65]] = 22002.0 +; CHECK-NEXT: Next-use distance of Register [[Reg66]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg67]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg68]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg69]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg70]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg71]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg72]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg73]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg74]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg75]] = 1.0 +entry: +; entry +; | +; loop1.header<-------+ +; | | +; loop2.header<-----+ | +; | | | +; loop3<-------+ | | +; +----------+ | | +; | | | +; loop2.latch-------+ | +; | | +; loop1.latch---------+ +; | +; exit + %ld1 = load i32, ptr addrspace(1) %p1, align 1 + br label %loop1.header + +loop1.header: + %phi.inc1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ] + %phi1 = phi i32 [ %ld1, %entry ], [ %sub, %loop1.latch ] + %add1 = add i32 %ld1, %phi.inc1 + br label %loop2.header + +loop2.header: + %phi.inc2 = phi i32 [ 0, %loop1.header ], [ %inc2, %loop2.latch ] + %phi2 = phi i32 [ 13, %loop1.header ], [ %mul, %loop2.latch ] + %ld2 = load i32, ptr addrspace(1) %p2, align 1 + %add2 = add i32 %ld2, %phi.inc2 + store i32 %add2, ptr addrspace(1) %p4, align 1 + br label %loop3 + +loop3: + %phi.inc3 = phi i32 [ 0, %loop2.header ], [ %inc3, %loop3 ] + %inc3 = add i32 %phi.inc3, 3 + %sub = sub i32 %ld2, %inc3 + %ld3 = load i32, ptr addrspace(1) %p3, align 1 + %add3 = add i32 %ld3, %inc3 + store i32 %add3, ptr addrspace(1) %p5, align 1 + %cond3 = icmp ult i32 %inc3, %TC1 + br i1 %cond3, label %loop3, label %loop2.latch + +loop2.latch: + %mul = mul i32 %phi1, %add3 + %inc2 = add i32 %phi.inc2, 2 + %cond2 = icmp ult i32 %inc2, %TC2 + br i1 %cond2, label %loop2.header, label %loop1.latch + +loop1.latch: + %add4 = add i32 %phi2, %phi1 + %add5 = add i32 %add3, %add4 + %inc1 = add i32 %phi.inc1, 1 + %cond1 = icmp ult i32 %inc1, %TC1 + br i1 %cond1, label %loop1.header, label %exit + +exit: + %add6 = add i32 %add3, %mul + %add7 = add i32 %add6, %add5 + %add8 = add i32 %add7, %phi2 + %add9 = add i32 %add8, %add1 + ret i32 %add9 +} diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_spill_in_common_dominator_and_optimize_restores.ll b/llvm/test/CodeGen/AMDGPU/test_ers_spill_in_common_dominator_and_optimize_restores.ll new file mode 100644 index 0000000000000..bf6efeb57249c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_spill_in_common_dominator_and_optimize_restores.ll @@ -0,0 +1,381 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +@array1 = global [5 x i32] zeroinitializer, align 4 +@array2 = global [5 x i32] zeroinitializer, align 4 +@array3 = global [5 x i32] zeroinitializer, align 4 +@array4 = global [5 x i32] zeroinitializer, align 4 + +; bb.0.entry +; / | +; bb.1.bb1 | +; \ | +; bb.2.bb2 +; +define amdgpu_ps void @test(ptr addrspace(1) %p1, ptr addrspace(3) %p2, i1 %cond1, ptr addrspace(1) %p3, ptr addrspace(1) %p4, ptr addrspace(1) %p5, i32 %arg1, i32 %arg2) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]], $vgpr5 in [[Reg6:%[0-9]+]], $vgpr6 in [[Reg7:%[0-9]+]], $vgpr7 in [[Reg8:%[0-9]+]], $vgpr8 in [[Reg9:%[0-9]+]], $vgpr9 in [[Reg10:%[0-9]+]], $vgpr10 in [[Reg11:%[0-9]+]], $vgpr11 in [[Reg12:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000); %bb.1(50.00%), %bb.2(50.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 +; CHECK-NEXT: [[Reg12]]:vgpr_32 = COPY killed $vgpr11 +; CHECK-NEXT: [[Reg11]]:vgpr_32 = COPY killed $vgpr10 +; CHECK-NEXT: [[Reg10]]:vgpr_32 = COPY killed $vgpr9 +; CHECK-NEXT: [[Reg9]]:vgpr_32 = COPY killed $vgpr8 +; CHECK-NEXT: [[Reg8]]:vgpr_32 = COPY killed $vgpr7 +; CHECK-NEXT: [[Reg7]]:vgpr_32 = COPY killed $vgpr6 +; CHECK-NEXT: [[Reg6]]:vgpr_32 = COPY killed $vgpr5 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg13:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg9]]:vgpr_32, %subreg.sub0, killed [[Reg10]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg14:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg7]]:vgpr_32, %subreg.sub0, killed [[Reg8]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg15:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg5]]:vgpr_32, %subreg.sub0, killed [[Reg6]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg16:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg1]]:vgpr_32, %subreg.sub0, killed [[Reg2]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg17:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 1, killed [[Reg4]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg18:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 1, killed [[Reg17]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg19:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg16]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg20:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg16]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg21:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg20]]:vgpr_32, 8, killed [[Reg19]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg22:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg16]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg23:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg16]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg24:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg23]]:vgpr_32, 8, killed [[Reg22]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg25:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg24]]:vgpr_32, 16, killed [[Reg21]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg26:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg16]]:vreg_64, 12, 0, implicit $exec :: (load (s8) from %ir.gep1, addrspace 1) +; CHECK-NEXT: [[Reg27:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg16]]:vreg_64, 13, 0, implicit $exec :: (load (s8) from %ir.gep1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg28:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg27]]:vgpr_32, 8, killed [[Reg26]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg29:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg16]]:vreg_64, 14, 0, implicit $exec :: (load (s8) from %ir.gep1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg30:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[Reg16]]:vreg_64, 15, 0, implicit $exec :: (load (s8) from %ir.gep1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg31:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg30]]:vgpr_32, 8, killed [[Reg29]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg32:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg31]]:vgpr_32, 16, killed [[Reg28]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg33:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[Reg15]]:vreg_64, 16, 0, implicit $exec :: (load (s128) from %ir.p3 + 16, align 4, addrspace 1) +; CHECK-NEXT: [[Reg34:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[Reg15]]:vreg_64, 0, 0, implicit $exec :: (load (s128) from %ir.p3, align 4, addrspace 1) +; CHECK-NEXT: [[Reg35:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg34]].sub0:vreg_128, [[Reg25]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg15]]:vreg_64, [[Reg35]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg36:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg35]]:vgpr_32, [[Reg25]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg37:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array1, target-flags(amdgpu-gotprel32-hi) @array1, implicit-def dead $scc +; CHECK-NEXT: [[Reg38:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg37]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg39:%[0-9]+]]:vreg_64 = COPY [[Reg38]]:sreg_64_xexec +; CHECK-NEXT: [[Reg40:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg39]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array1, i64 20)`) +; CHECK-NEXT: [[Reg41:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array3, target-flags(amdgpu-gotprel32-hi) @array3, implicit-def dead $scc +; CHECK-NEXT: [[Reg42:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg41]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg43:%[0-9]+]]:vreg_64 = COPY [[Reg42]]:sreg_64_xexec +; CHECK-NEXT: FLAT_STORE_DWORD killed [[Reg43]]:vreg_64, [[Reg40]]:vgpr_32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 4)`) +; CHECK-NEXT: [[Reg44:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg14]]:vreg_64, 20, 0, implicit $exec :: (load (s8) from %ir.p4 + 20, addrspace 1) +; CHECK-NEXT: [[Reg45:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg14]]:vreg_64, 21, 0, implicit $exec :: (load (s8) from %ir.p4 + 21, addrspace 1) +; CHECK-NEXT: [[Reg46:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg45]]:vgpr_32, 8, killed [[Reg44]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg47:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg14]]:vreg_64, 22, 0, implicit $exec :: (load (s8) from %ir.p4 + 22, addrspace 1) +; CHECK-NEXT: [[Reg48:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg14]]:vreg_64, 23, 0, implicit $exec :: (load (s8) from %ir.p4 + 23, addrspace 1) +; CHECK-NEXT: [[Reg49:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg48]]:vgpr_32, 8, killed [[Reg47]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg50:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg49]]:vgpr_32, 16, killed [[Reg46]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg51:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg14]]:vreg_64, 12, 0, implicit $exec :: (load (s8) from %ir.p4 + 12, addrspace 1) +; CHECK-NEXT: [[Reg52:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg14]]:vreg_64, 13, 0, implicit $exec :: (load (s8) from %ir.p4 + 13, addrspace 1) +; CHECK-NEXT: [[Reg53:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg52]]:vgpr_32, 8, killed [[Reg51]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg54:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg14]]:vreg_64, 14, 0, implicit $exec :: (load (s8) from %ir.p4 + 14, addrspace 1) +; CHECK-NEXT: [[Reg55:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg14]]:vreg_64, 15, 0, implicit $exec :: (load (s8) from %ir.p4 + 15, addrspace 1) +; CHECK-NEXT: [[Reg56:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg55]]:vgpr_32, 8, killed [[Reg54]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg57:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg56]]:vgpr_32, 16, killed [[Reg53]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg58:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg14]]:vreg_64, 0, 0, implicit $exec :: (load (s16) from %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg59:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_USHORT [[Reg14]]:vreg_64, 2, 0, implicit $exec :: (load (s16) from %ir.p4 + 2, addrspace 1) +; CHECK-NEXT: [[Reg60:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg59]]:vgpr_32, 16, killed [[Reg58]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg61:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg60]]:vgpr_32, [[Reg35]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg14]]:vreg_64, killed [[Reg61]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg62:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg18]]:sreg_32, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.1 +; EMPTY: +; CHECK: bb.1.bb1: +; CHECK-NEXT: ; predecessors: %bb.0 +; CHECK-NEXT: successors: %bb.2(0x80000000); %bb.2(100.00%) +; EMPTY: +; CHECK: [[Reg63:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 0, 0, implicit $exec :: (load (s8) from %ir.p2, addrspace 3) +; CHECK-NEXT: [[Reg64:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 1, 0, implicit $exec :: (load (s8) from %ir.p2 + 1, addrspace 3) +; CHECK-NEXT: [[Reg65:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 [[Reg3]]:vgpr_32, 2, 0, implicit $exec :: (load (s8) from %ir.p2 + 2, addrspace 3) +; CHECK-NEXT: [[Reg66:%[0-9]+]]:vgpr_32 = DS_READ_U8_gfx9 killed [[Reg3]]:vgpr_32, 3, 0, implicit $exec :: (load (s8) from %ir.p2 + 3, addrspace 3) +; CHECK-NEXT: [[Reg67:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg64]]:vgpr_32, 8, killed [[Reg63]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg68:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg66]]:vgpr_32, 8, killed [[Reg65]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg69:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg68]]:vgpr_32, 16, killed [[Reg67]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg70:%[0-9]+]]:vreg_64 = COPY killed [[Reg38]]:sreg_64_xexec +; CHECK-NEXT: [[Reg71:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg70]]:vreg_64, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array1, i64 28)`) +; CHECK-NEXT: [[Reg72:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array2, target-flags(amdgpu-gotprel32-hi) @array2, implicit-def dead $scc +; CHECK-NEXT: [[Reg73:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg72]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg74:%[0-9]+]]:vreg_64 = COPY killed [[Reg73]]:sreg_64_xexec +; CHECK-NEXT: FLAT_STORE_DWORD [[Reg74]]:vreg_64, [[Reg71]]:vgpr_32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array2, i64 68)`) +; CHECK-NEXT: [[Reg75:%[0-9]+]]:sreg_64 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @array4, target-flags(amdgpu-gotprel32-hi) @array4, implicit-def dead $scc +; CHECK-NEXT: [[Reg76:%[0-9]+]]:sreg_64_xexec = S_LOAD_DWORDX2_IMM killed [[Reg75]]:sreg_64, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4) +; CHECK-NEXT: [[Reg77:%[0-9]+]]:vreg_64 = COPY killed [[Reg76]]:sreg_64_xexec +; CHECK-NEXT: [[Reg78:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg77]]:vreg_64, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array4, i64 20)`) +; CHECK-NEXT: [[Reg79:%[0-9]+]]:vreg_64 = COPY killed [[Reg42]]:sreg_64_xexec +; CHECK-NEXT: FLAT_STORE_DWORD [[Reg79]]:vreg_64, [[Reg78]]:vgpr_32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 60)`) +; CHECK-NEXT: [[Reg80:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[Reg79]]:vreg_64, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from `ptr getelementptr inbounds nuw (i8, ptr @array3, i64 84)`) +; CHECK-NEXT: [[Reg81:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg79]]:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @array3) +; CHECK-NEXT: [[Reg82:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg70]]:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @array1) +; CHECK-NEXT: [[Reg83:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg74]]:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @array2) +; CHECK-NEXT: [[Reg84:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD killed [[Reg77]]:vreg_64, 0, 0, implicit $exec, implicit $flat_scr :: (dereferenceable load (s32) from @array4) +; CHECK-NEXT: [[Reg85:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 [[Reg83]]:vgpr_32, [[Reg84]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg86:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg85]]:vgpr_32, %subreg.sub0, undef [[Reg87:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg88:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 [[Reg81]]:vgpr_32, [[Reg82]]:vgpr_32, killed [[Reg86]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg89:%[0-9]+]]:vgpr_32 = COPY killed [[Reg88]].sub0:vreg_64 +; CHECK-NEXT: [[Reg90:%[0-9]+]]:vgpr_32 = COPY [[Reg34]].sub1:vreg_128 +; CHECK-NEXT: [[Reg91:%[0-9]+]]:vgpr_32 = COPY killed [[Reg34]].sub2:vreg_128 +; CHECK-NEXT: [[Reg92:%[0-9]+]]:vgpr_32 = COPY [[Reg33]].sub2:vreg_128 +; CHECK-NEXT: [[Reg93:%[0-9]+]]:vgpr_32 = COPY killed [[Reg33]].sub3:vreg_128 +; EMPTY: +; CHECK: bb.2.bb2: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.1 +; EMPTY: +; CHECK: [[Reg94:%[0-9]+]]:vgpr_32 = PHI [[Reg25]]:vgpr_32, %bb.0, [[Reg71]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg95:%[0-9]+]]:vgpr_32 = PHI [[Reg40]]:vgpr_32, %bb.0, [[Reg78]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg96:%[0-9]+]]:vgpr_32 = PHI [[Reg40]]:vgpr_32, %bb.0, [[Reg80]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg97:%[0-9]+]]:vgpr_32 = PHI [[Reg40]]:vgpr_32, %bb.0, [[Reg81]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg98:%[0-9]+]]:vgpr_32 = PHI [[Reg40]]:vgpr_32, %bb.0, [[Reg82]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg99:%[0-9]+]]:vgpr_32 = PHI [[Reg40]]:vgpr_32, %bb.0, [[Reg83]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg100:%[0-9]+]]:vgpr_32 = PHI [[Reg40]]:vgpr_32, %bb.0, [[Reg84]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg101:%[0-9]+]]:vgpr_32 = PHI [[Reg40]]:vgpr_32, %bb.0, [[Reg89]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg102:%[0-9]+]]:vgpr_32 = PHI [[Reg40]]:vgpr_32, %bb.0, [[Reg90]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg103:%[0-9]+]]:vgpr_32 = PHI [[Reg40]]:vgpr_32, %bb.0, [[Reg91]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg104:%[0-9]+]]:vgpr_32 = PHI [[Reg40]]:vgpr_32, %bb.0, [[Reg92]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg105:%[0-9]+]]:vgpr_32 = PHI [[Reg40]]:vgpr_32, %bb.0, [[Reg93]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg106:%[0-9]+]]:vgpr_32 = PHI [[Reg32]]:vgpr_32, %bb.0, [[Reg25]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg107:%[0-9]+]]:vgpr_32 = PHI [[Reg40]]:vgpr_32, %bb.0, [[Reg11]]:vgpr_32, %bb.1 +; CHECK-NEXT: [[Reg108:%[0-9]+]]:vgpr_32 = PHI [[Reg36]]:vgpr_32, %bb.0, [[Reg69]]:vgpr_32, %bb.1 +; CHECK-NEXT: SI_END_CF killed [[Reg62]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg109:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg94]]:vgpr_32, killed [[Reg57]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg110:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg109]]:vgpr_32, killed [[Reg50]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg111:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg110]]:vgpr_32, killed [[Reg40]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg112:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg111]]:vgpr_32, killed [[Reg95]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg113:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg112]]:vgpr_32, killed [[Reg96]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg114:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg113]]:vgpr_32, killed [[Reg35]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg115:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg114]]:vgpr_32, killed [[Reg97]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg116:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg115]]:vgpr_32, killed [[Reg98]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg117:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg116]]:vgpr_32, killed [[Reg99]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg118:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 killed [[Reg117]]:vgpr_32, killed [[Reg100]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg119:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg118]]:vgpr_32, killed [[Reg101]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg120:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg119]]:vgpr_32, killed [[Reg102]]:vgpr_32, killed [[Reg103]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg121:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg120]]:vgpr_32, killed [[Reg104]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg122:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg121]]:vgpr_32, killed [[Reg105]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg123:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg122]]:vgpr_32, killed [[Reg106]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg124:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg108]]:vgpr_32, %subreg.sub0, undef [[Reg125:%[0-9]+]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg126:%[0-9]+]]:vreg_64, $sgpr_null = V_MAD_U64_U32_e64 killed [[Reg123]]:vgpr_32, killed [[Reg107]]:vgpr_32, killed [[Reg124]]:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg127:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg126]].sub0:vreg_64, killed [[Reg12]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_SHORT_D16_HI [[Reg13]]:vreg_64, [[Reg127]]:vgpr_32, 2, 0, implicit $exec :: (store (s16) into %ir.p5 + 2, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_SHORT killed [[Reg13]]:vreg_64, killed [[Reg127]]:vgpr_32, 0, 0, implicit $exec :: (store (s16) into %ir.p5, addrspace 1) +; CHECK-NEXT: S_ENDPGM 0 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg12]] = 99.0 +; CHECK-NEXT: Next-use distance of Register [[Reg11]] = 78.0 +; CHECK-NEXT: Next-use distance of Register [[Reg10]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg5]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 57.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg13]] = 88.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 32.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 18.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 47.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg22]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 47.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 64.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 44.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg39]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 32.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg52]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg53]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg54]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg56]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg57]] = 24.0 +; CHECK-NEXT: Next-use distance of Register [[Reg58]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg59]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg60]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg61]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg62]] = 17.0 +; CHECK-NEXT: Next-use distance of Register [[Reg63]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg64]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg65]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg66]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg67]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg68]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg69]] = 40.0 +; CHECK-NEXT: Next-use distance of Register [[Reg70]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg71]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg72]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg73]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg74]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg75]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg76]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg77]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg78]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg79]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg80]] = 15.0 +; CHECK-NEXT: Next-use distance of Register [[Reg81]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg82]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg83]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg84]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg85]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg86]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg88]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg89]] = 12.0 +; CHECK-NEXT: Next-use distance of Register [[Reg90]] = 12.0 +; CHECK-NEXT: Next-use distance of Register [[Reg91]] = 12.0 +; CHECK-NEXT: Next-use distance of Register [[Reg92]] = 12.0 +; CHECK-NEXT: Next-use distance of Register [[Reg93]] = 12.0 +; CHECK-NEXT: Next-use distance of Register [[Reg94]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg95]] = 18.0 +; CHECK-NEXT: Next-use distance of Register [[Reg96]] = 18.0 +; CHECK-NEXT: Next-use distance of Register [[Reg97]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg98]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg99]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg100]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg101]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg102]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg103]] = 18.0 +; CHECK-NEXT: Next-use distance of Register [[Reg104]] = 18.0 +; CHECK-NEXT: Next-use distance of Register [[Reg105]] = 18.0 +; CHECK-NEXT: Next-use distance of Register [[Reg106]] = 18.0 +; CHECK-NEXT: Next-use distance of Register [[Reg107]] = 19.0 +; CHECK-NEXT: Next-use distance of Register [[Reg108]] = 17.0 +; CHECK-NEXT: Next-use distance of Register [[Reg109]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg110]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg111]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg112]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg113]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg114]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg115]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg116]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg117]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg118]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg119]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg120]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg121]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg122]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg123]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg124]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg126]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg127]] = 1.0 +entry: + %ld1 = load i32, ptr addrspace(1) %p1, align 1 + %gep1 = getelementptr inbounds i32, ptr addrspace(1) %p1, i64 3 + %ld2 = load i32, ptr addrspace(1) %gep1, align 1 + %load1 = load i32, ptr addrspace(1) %p3, align 4 + %tmp1 = add i32 %load1, %ld1 + %load2 = load <8 x i32>, ptr addrspace(1) %p3, align 1 + store i32 %tmp1, ptr addrspace(1) %p3 + %add1 = add i32 %ld1, %tmp1 + %idx10 = getelementptr inbounds [5 x i32], [5 x i32]* @array1, i64 1, i64 0 + %val0 = load i32, i32* %idx10, align 4 + %idx20 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 0, i64 1 + store i32 %val0, i32 *%idx20 + %load3 = load <8 x i32>, ptr addrspace(1) %p4, align 1 + %load4 = load i32, ptr addrspace(1) %p4, align 2 + %tmp2 = add i32 %load4, %tmp1 + store i32 %tmp2, ptr addrspace(1) %p4 + br i1 %cond1, label %bb1, label %bb2 + +bb1: + %ld3 = load i32, ptr addrspace(3) %p2, align 1 + %idx12 = getelementptr inbounds [5 x i32], [5 x i32]* @array1, i64 1, i64 2 + %val2 = load i32, i32* %idx12, align 4 + %idx22 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 3, i64 2 + store i32 %val2, i32 *%idx22 + %idx13 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 1, i64 0 + %val3 = load i32, i32* %idx13, align 4 + %idx23 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 3, i64 0 + store i32 %val3, i32 *%idx23 + %idx14 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 4, i64 1 + %val4 = load i32, i32* %idx14, align 4 + %idx24 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 3, i64 0 + %idx15 = getelementptr inbounds [5 x i32], [5 x i32]* @array3, i64 0, i64 0 + %val5 = load i32, i32* %idx15, align 4 + %idx16 = getelementptr inbounds [5 x i32], [5 x i32]* @array1, i64 0, i64 0 + %val6 = load i32, i32* %idx16, align 4 + %idx17 = getelementptr inbounds [5 x i32], [5 x i32]* @array2, i64 0, i64 0 + %val7 = load i32, i32* %idx17, align 4 + %idx18 = getelementptr inbounds [5 x i32], [5 x i32]* @array4, i64 0, i64 0 + %val8 = load i32, i32* %idx18, align 4 + %mul10 = mul i32 %val5, %val6 + %mul11 = mul i32 %val7, %val8 + %add100 = add i32 %mul10, %mul11 + %extract1 = extractelement < 8 x i32> %load2, i32 1 + %extract2 = extractelement < 8 x i32> %load2, i32 2 + %extract3 = extractelement < 8 x i32> %load2, i32 6 + %extract4 = extractelement < 8 x i32> %load2, i32 7 + br label %bb2 + +bb2: + %phi1 = phi i32 [ %ld1, %entry ], [ %val2, %bb1 ] + %phi2 = phi i32 [ %val0, %entry ], [ %val3, %bb1 ] + %phi3 = phi i32 [ %val0, %entry ], [ %val4, %bb1 ] + %phi4 = phi i32 [ %val0, %entry ], [ %val5, %bb1 ] + %phi5 = phi i32 [ %val0, %entry ], [ %val6, %bb1 ] + %phi6 = phi i32 [ %val0, %entry ], [ %val7, %bb1 ] + %phi7 = phi i32 [ %val0, %entry ], [ %val8, %bb1 ] + %phi8 = phi i32 [ %val0, %entry ], [ %add100, %bb1 ] + %phi9 = phi i32 [ %val0, %entry ], [ %extract1, %bb1 ] + %phi10 = phi i32 [ %val0, %entry ], [ %extract2, %bb1 ] + %phi11 = phi i32 [ %val0, %entry ], [ %extract3, %bb1 ] + %phi12 = phi i32 [ %val0, %entry ], [ %extract4, %bb1 ] + %phi13 = phi i32 [ %ld2, %entry ], [ %ld1, %bb1 ] + %phi14 = phi i32 [ %val0, %entry ], [ %arg1, %bb1 ] + %phi15 = phi i32 [ %add1, %entry ], [ %ld3, %bb1 ] + %extract5 = extractelement < 8 x i32> %load3, i32 3 + %extract6 = extractelement < 8 x i32> %load3, i32 5 + %res1 = add i32 %phi1, %extract5 + %res2 = mul i32 %res1, %extract6 + %res3 = sub i32 %res2, %val0 + %res4 = sub i32 %res3, %phi2 + %res5 = add i32 %res4, %phi3 + %res6 = sub i32 %res5, %tmp1 + %res7 = mul i32 %res6, %phi4 + %res8 = mul i32 %res7, %phi5 + %res9 = sub i32 %res8, %phi6 + %res10 = add i32 %res9, %phi7 + %res11 = mul i32 %res10, %phi8 + %res12 = add i32 %res11, %phi9 + %res13 = add i32 %res12, %phi10 + %res14 = sub i32 %res13, %phi11 + %res15 = sub i32 %res14, %phi12 + %res16 = mul i32 %res15, %phi13 + %res17 = mul i32 %res16, %phi14 + %res18 = add i32 %res17, %phi15 + %res19 = sub i32 %res18, %arg2 + store i32 %res19, ptr addrspace(1) %p5, align 2 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_spill_loop_livethrough_reg.ll b/llvm/test/CodeGen/AMDGPU/test_ers_spill_loop_livethrough_reg.ll new file mode 100644 index 0000000000000..dcc5bc444785a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_spill_loop_livethrough_reg.ll @@ -0,0 +1,240 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +; +; bb.0.entry +; | +; bb.1.loop.header<--+ +; / | | +; bb.2.bb1 | | +; \ | | +; bb.5.Flow | +; / | | +; bb.6.bb3 | | +; \ | | +; bb.3.Flow1 | +; / | | +; bb.4.bb2 | | +; \ | | +; bb.7.loop.latch----+ +; | +; bb.8.exit +; +define amdgpu_ps void @test(ptr addrspace(1) %p1, ptr addrspace(1) %p2, ptr addrspace(1) %p3, i32 %TC) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]], $vgpr5 in [[Reg6:%[0-9]+]], $vgpr6 in [[Reg7:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.1(0x80000000); %bb.1(100.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6 +; CHECK-NEXT: [[Reg7]]:vgpr_32 = COPY killed $vgpr6 +; CHECK-NEXT: [[Reg6]]:vgpr_32 = COPY killed $vgpr5 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg8:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg5]]:vgpr_32, %subreg.sub0, killed [[Reg6]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg9:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg3]]:vgpr_32, %subreg.sub0, killed [[Reg4]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg10:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg1]]:vgpr_32, %subreg.sub0, killed [[Reg2]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg11:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg10]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p1, addrspace 1) +; CHECK-NEXT: [[Reg12:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg10]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p1 + 1, addrspace 1) +; CHECK-NEXT: [[Reg13:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg12]]:vgpr_32, 8, killed [[Reg11]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg14:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg10]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p1 + 2, addrspace 1) +; CHECK-NEXT: [[Reg15:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[Reg10]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p1 + 3, addrspace 1) +; CHECK-NEXT: [[Reg16:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg15]]:vgpr_32, 8, killed [[Reg14]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg17:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg16]]:vgpr_32, 16, killed [[Reg13]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg18:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; EMPTY: +; CHECK: bb.1.loop.header: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.7 +; CHECK-NEXT: successors: %bb.2(0x40000000), %bb.5(0x40000000); %bb.2(50.00%), %bb.5(50.00%) +; EMPTY: +; CHECK: [[Reg19:%[0-9]+]]:sreg_32 = PHI [[Reg18]]:sreg_32, %bb.0, [[Reg20:%[0-9]+]]:sreg_32, %bb.7 +; CHECK-NEXT: [[Reg21:%[0-9]+]]:vreg_64 = PHI undef [[Reg22:%[0-9]+]]:vreg_64, %bb.0, [[Reg23:%[0-9]+]]:vreg_64, %bb.7 +; CHECK-NEXT: [[Reg24:%[0-9]+]]:sreg_32 = PHI [[Reg18]]:sreg_32, %bb.0, [[Reg25:%[0-9]+]]:sreg_32, %bb.7 +; CHECK-NEXT: [[Reg26:%[0-9]+]]:vgpr_32 = PHI [[Reg17]]:vgpr_32, %bb.0, [[Reg27:%[0-9]+]]:vgpr_32, %bb.7 +; CHECK-NEXT: [[Reg28:%[0-9]+]]:sreg_32 = V_CMP_GE_I32_e64 [[Reg24]]:sreg_32, [[Reg17]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg29:%[0-9]+]]:sreg_32 = V_CMP_LT_I32_e64 [[Reg24]]:sreg_32, [[Reg17]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg30:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg29]]:sreg_32, %bb.5, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.2 +; EMPTY: +; CHECK: bb.2.bb1: +; CHECK-NEXT: ; predecessors: %bb.1 +; CHECK-NEXT: successors: %bb.5(0x80000000); %bb.5(100.00%) +; EMPTY: +; CHECK: [[Reg31:%[0-9]+]]:sreg_32_xm0 = S_ASHR_I32 [[Reg24]]:sreg_32, 31, implicit-def dead $scc +; CHECK-NEXT: [[Reg32:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[Reg24]]:sreg_32, %subreg.sub0, killed [[Reg31]]:sreg_32_xm0, %subreg.sub1 +; CHECK-NEXT: [[Reg33:%[0-9]+]]:sreg_64 = nsw S_LSHL_B64 killed [[Reg32]]:sreg_64, 2, implicit-def dead $scc +; CHECK-NEXT: [[Reg34:%[0-9]+]]:vgpr_32, [[Reg35:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[Reg9]].sub0:vreg_64, [[Reg33]].sub0:sreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg36:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 killed [[Reg33]].sub1:sreg_64, [[Reg9]].sub1:vreg_64, killed [[Reg35]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg37:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg34]]:vgpr_32, %subreg.sub0, killed [[Reg36]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg38:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg37]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.gep, addrspace 1) +; CHECK-NEXT: [[Reg39:%[0-9]+]]:sreg_32 = V_CMP_LE_I32_e64 killed [[Reg38]]:vgpr_32, [[Reg17]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg40:%[0-9]+]]:sreg_32 = COPY $exec_lo +; CHECK-NEXT: [[Reg41:%[0-9]+]]:sreg_32 = S_ANDN2_B32 killed [[Reg28]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: [[Reg42:%[0-9]+]]:sreg_32 = S_AND_B32 killed [[Reg39]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: [[Reg43:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[Reg41]]:sreg_32, killed [[Reg42]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: S_BRANCH %bb.5 +; EMPTY: +; CHECK: bb.3.Flow1: +; CHECK-NEXT: ; predecessors: %bb.5, %bb.6 +; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000); %bb.4(50.00%), %bb.7(50.00%) +; EMPTY: +; CHECK: [[Reg44:%[0-9]+]]:sreg_32 = PHI [[Reg45:%[0-9]+]]:sreg_32, %bb.5, [[Reg46:%[0-9]+]]:sreg_32, %bb.6 +; CHECK-NEXT: [[Reg47:%[0-9]+]]:vgpr_32 = PHI undef [[Reg48:%[0-9]+]]:vgpr_32, %bb.5, [[Reg49:%[0-9]+]]:vgpr_32, %bb.6 +; CHECK-NEXT: SI_END_CF killed [[Reg50:%[0-9]+]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg51:%[0-9]+]]:sreg_32 = SI_IF killed [[Reg44]]:sreg_32, %bb.7, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.4 +; EMPTY: +; CHECK: bb.4.bb2: +; CHECK-NEXT: ; predecessors: %bb.3 +; CHECK-NEXT: successors: %bb.7(0x80000000); %bb.7(100.00%) +; EMPTY: +; CHECK: GLOBAL_STORE_DWORD [[Reg23]]:vreg_64, killed [[Reg26]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.9, addrspace 1) +; CHECK-NEXT: [[Reg52:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.7 +; EMPTY: +; CHECK: bb.5.Flow: +; CHECK-NEXT: ; predecessors: %bb.1, %bb.2 +; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.3(0x40000000); %bb.6(50.00%), %bb.3(50.00%) +; EMPTY: +; CHECK: [[Reg53:%[0-9]+]]:sreg_32 = PHI [[Reg28]]:sreg_32, %bb.1, [[Reg43]]:sreg_32, %bb.2 +; CHECK-NEXT: [[Reg45]]:sreg_32 = PHI [[Reg18]]:sreg_32, %bb.1, [[Reg40]]:sreg_32, %bb.2 +; CHECK-NEXT: [[Reg23]]:vreg_64 = PHI [[Reg21]]:vreg_64, %bb.1, [[Reg37]]:vreg_64, %bb.2 +; CHECK-NEXT: SI_END_CF killed [[Reg30]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg50]]:sreg_32 = SI_IF killed [[Reg53]]:sreg_32, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.6 +; EMPTY: +; CHECK: bb.6.bb3: +; CHECK-NEXT: ; predecessors: %bb.5 +; CHECK-NEXT: successors: %bb.3(0x80000000); %bb.3(100.00%) +; EMPTY: +; CHECK: [[Reg54:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 31, [[Reg26]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg55:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg26]]:vgpr_32, killed [[Reg54]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg49]]:vgpr_32 = V_ASHRREV_I32_e64 1, killed [[Reg55]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg56:%[0-9]+]]:sreg_32 = S_ANDN2_B32 killed [[Reg45]]:sreg_32, $exec_lo, implicit-def dead $scc +; CHECK-NEXT: [[Reg46]]:sreg_32 = COPY killed [[Reg56]]:sreg_32 +; CHECK-NEXT: S_BRANCH %bb.3 +; EMPTY: +; CHECK: bb.7.loop.latch: +; CHECK-NEXT: ; predecessors: %bb.3, %bb.4 +; CHECK-NEXT: successors: %bb.8(0x04000000), %bb.1(0x7c000000); %bb.8(3.12%), %bb.1(96.88%) +; EMPTY: +; CHECK: [[Reg27]]:vgpr_32 = PHI [[Reg47]]:vgpr_32, %bb.3, [[Reg52]]:vgpr_32, %bb.4 +; CHECK-NEXT: SI_END_CF killed [[Reg51]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg25]]:sreg_32 = S_ADD_I32 killed [[Reg24]]:sreg_32, 1, implicit-def dead $scc +; CHECK-NEXT: [[Reg57:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg25]]:sreg_32, [[Reg7]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg20]]:sreg_32 = SI_IF_BREAK killed [[Reg57]]:sreg_32, killed [[Reg19]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: SI_LOOP [[Reg20]]:sreg_32, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.8 +; EMPTY: +; CHECK: bb.8.exit: +; CHECK-NEXT: ; predecessors: %bb.7 +; EMPTY: +; CHECK: SI_END_CF killed [[Reg20]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg8]]:vreg_64, killed [[Reg27]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p3, addrspace 1) +; CHECK-NEXT: S_ENDPGM 0 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg7]] = 40.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg5]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 26012.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 21.0 +; CHECK-NEXT: Next-use distance of Register [[Reg10]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg11]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg12]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg13]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 23.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 11.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg39]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 6.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg52]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg53]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg54]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg56]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg57]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 1.0 +entry: +; entry +; | +; loop.header<-+ +; | | | +; bb1 | | +; | \ | | +; bb2 bb3 | +; | | | +; loop.latch---+ +; | +; exit + %ld1 = load i32, ptr addrspace(1) %p1, align 1 + br label %loop.header + +loop.header: + %phi.inc = phi i32 [ 0, %entry ], [ %inc, %loop.latch ] + %phi1 = phi i32 [ %ld1, %entry ], [ %phi2, %loop.latch ] + %cond1 = icmp slt i32 %phi.inc, %ld1 + br i1 %cond1, label %bb1, label %bb3 + +bb1: + %sext = sext i32 %phi.inc to i64 + %gep = getelementptr inbounds i32, ptr addrspace(1) %p2, i64 %sext + %ld2 = load i32, ptr addrspace(1) %gep, align 4 + %cond2 = icmp sgt i32 %ld2, %ld1 + br i1 %cond2, label %bb2, label %bb3 + +bb2: + store i32 %phi1, ptr addrspace(1) %gep, align 4 + br label %loop.latch + +bb3: + %div = sdiv i32 %phi1, 2 + br label %loop.latch + +loop.latch: + %phi2 = phi i32 [ 1, %bb2 ], [ %div, %bb3 ] + %inc = add i32 %phi.inc, 1 + %cond3 = icmp ult i32 %inc, %TC + br i1 %cond3, label %loop.header, label %exit + +exit: + store i32 %phi2, ptr addrspace(1) %p3, align 4 + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/test_ers_spill_loop_value_in_exit_block.ll b/llvm/test/CodeGen/AMDGPU/test_ers_spill_loop_value_in_exit_block.ll new file mode 100644 index 0000000000000..c1b5b8d0cab6c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/test_ers_spill_loop_value_in_exit_block.ll @@ -0,0 +1,184 @@ +; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -enable-next-use-analysis=true -verify-machineinstrs -dump-next-use-distance < %s 2>&1 | FileCheck %s + +; +; bb.0.entry +; | +; +<-----+ +; bb.1.loop | +; +------+ +; | +; bb.2.exit +; +define amdgpu_ps void @test(ptr addrspace(1) %p1, ptr addrspace(1) %p2, ptr addrspace(1) %p3, ptr addrspace(1) %p4, ptr addrspace(1) %p5, i32 %TC) { +; CHECK-LABEL: # Machine code for function test: IsSSA, TracksLiveness +; CHECK-NEXT: Function Live Ins: $vgpr0 in [[Reg1:%[0-9]+]], $vgpr1 in [[Reg2:%[0-9]+]], $vgpr2 in [[Reg3:%[0-9]+]], $vgpr3 in [[Reg4:%[0-9]+]], $vgpr4 in [[Reg5:%[0-9]+]], $vgpr5 in [[Reg6:%[0-9]+]], $vgpr6 in [[Reg7:%[0-9]+]], $vgpr7 in [[Reg8:%[0-9]+]], $vgpr8 in [[Reg9:%[0-9]+]], $vgpr9 in [[Reg10:%[0-9]+]], $vgpr10 in [[Reg11:%[0-9]+]] +; EMPTY: +; CHECK: bb.0.entry: +; CHECK-NEXT: successors: %bb.1(0x80000000); %bb.1(100.00%) +; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 +; CHECK-NEXT: [[Reg11]]:vgpr_32 = COPY killed $vgpr10 +; CHECK-NEXT: [[Reg10]]:vgpr_32 = COPY killed $vgpr9 +; CHECK-NEXT: [[Reg9]]:vgpr_32 = COPY killed $vgpr8 +; CHECK-NEXT: [[Reg8]]:vgpr_32 = COPY killed $vgpr7 +; CHECK-NEXT: [[Reg7]]:vgpr_32 = COPY killed $vgpr6 +; CHECK-NEXT: [[Reg6]]:vgpr_32 = COPY killed $vgpr5 +; CHECK-NEXT: [[Reg5]]:vgpr_32 = COPY killed $vgpr4 +; CHECK-NEXT: [[Reg4]]:vgpr_32 = COPY killed $vgpr3 +; CHECK-NEXT: [[Reg3]]:vgpr_32 = COPY killed $vgpr2 +; CHECK-NEXT: [[Reg2]]:vgpr_32 = COPY killed $vgpr1 +; CHECK-NEXT: [[Reg1]]:vgpr_32 = COPY killed $vgpr0 +; CHECK-NEXT: [[Reg12:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg9]]:vgpr_32, %subreg.sub0, killed [[Reg10]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg13:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg7]]:vgpr_32, %subreg.sub0, killed [[Reg8]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg14:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg5]]:vgpr_32, %subreg.sub0, killed [[Reg6]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg15:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg1]]:vgpr_32, %subreg.sub0, killed [[Reg2]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg16:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg3]]:vgpr_32, %subreg.sub0, killed [[Reg4]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg17:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg16]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p2, addrspace 1) +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg16]]:vreg_64, [[Reg11]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p2, addrspace 1) +; CHECK-NEXT: [[Reg18:%[0-9]+]]:sreg_32 = S_MOV_B32 -1 +; CHECK-NEXT: [[Reg19:%[0-9]+]]:sreg_32 = S_MOV_B32 0 +; EMPTY: +; CHECK: bb.1.loop: +; CHECK-NEXT: ; predecessors: %bb.0, %bb.1 +; CHECK-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000); %bb.2(3.12%), %bb.1(96.88%) +; EMPTY: +; CHECK: [[Reg20:%[0-9]+]]:sreg_32 = PHI [[Reg19]]:sreg_32, %bb.0, [[Reg21:%[0-9]+]]:sreg_32, %bb.1 +; CHECK-NEXT: [[Reg22:%[0-9]+]]:sreg_32 = PHI [[Reg18]]:sreg_32, %bb.0, [[Reg23:%[0-9]+]]:sreg_32, %bb.1 +; CHECK-NEXT: [[Reg23]]:sreg_32 = S_ADD_I32 [[Reg22]]:sreg_32, 1, implicit-def dead $scc +; CHECK-NEXT: [[Reg24:%[0-9]+]]:sreg_32 = S_ADD_I32 killed [[Reg22]]:sreg_32, 2, implicit-def dead $scc +; CHECK-NEXT: [[Reg25:%[0-9]+]]:sreg_32 = V_CMP_GE_U32_e64 [[Reg24]]:sreg_32, [[Reg17]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg21]]:sreg_32 = SI_IF_BREAK killed [[Reg25]]:sreg_32, killed [[Reg20]]:sreg_32, implicit-def dead $scc +; CHECK-NEXT: [[Reg26:%[0-9]+]]:vgpr_32 = COPY [[Reg23]]:sreg_32, implicit $exec +; CHECK-NEXT: [[Reg27:%[0-9]+]]:vgpr_32 = COPY killed [[Reg24]]:sreg_32, implicit $exec +; CHECK-NEXT: SI_LOOP [[Reg21]]:sreg_32, %bb.1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: S_BRANCH %bb.2 +; EMPTY: +; CHECK: bb.2.exit: +; CHECK-NEXT: ; predecessors: %bb.1 +; EMPTY: +; CHECK: SI_END_CF killed [[Reg21]]:sreg_32, implicit-def dead $exec, implicit-def dead $scc, implicit $exec +; CHECK-NEXT: [[Reg28:%[0-9]+]]:vgpr_32 = V_ASHRREV_I32_e64 31, [[Reg26]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg29:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[Reg26]]:vgpr_32, %subreg.sub0, killed [[Reg28]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg30:%[0-9]+]]:vreg_64 = nsw V_LSHLREV_B64_pseudo_e64 2, killed [[Reg29]]:vreg_64, implicit $exec +; CHECK-NEXT: [[Reg31:%[0-9]+]]:vgpr_32, [[Reg32:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[Reg15]].sub0:vreg_64, [[Reg30]].sub0:vreg_64, 0, implicit $exec +; CHECK-NEXT: [[Reg33:%[0-9]+]]:vgpr_32, dead $sgpr_null = V_ADDC_U32_e64 killed [[Reg15]].sub1:vreg_64, killed [[Reg30]].sub1:vreg_64, killed [[Reg32]]:sreg_32_xm0_xexec, 0, implicit $exec +; CHECK-NEXT: [[Reg34:%[0-9]+]]:vreg_64 = REG_SEQUENCE killed [[Reg31]]:vgpr_32, %subreg.sub0, killed [[Reg33]]:vgpr_32, %subreg.sub1 +; CHECK-NEXT: [[Reg35:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg34]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.gep.le, addrspace 1) +; CHECK-NEXT: [[Reg36:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg34]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.gep.le + 1, addrspace 1) +; CHECK-NEXT: [[Reg37:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg36]]:vgpr_32, 8, killed [[Reg35]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg38:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg34]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.gep.le + 2, addrspace 1) +; CHECK-NEXT: [[Reg39:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE killed [[Reg34]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.gep.le + 3, addrspace 1) +; CHECK-NEXT: [[Reg40:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg39]]:vgpr_32, 8, killed [[Reg38]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg41:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg40]]:vgpr_32, 16, killed [[Reg37]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg42:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg14]]:vreg_64, 0, 0, implicit $exec :: (load (s8) from %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg43:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg14]]:vreg_64, 1, 0, implicit $exec :: (load (s8) from %ir.p3 + 1, addrspace 1) +; CHECK-NEXT: [[Reg44:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg43]]:vgpr_32, 8, killed [[Reg42]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg45:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg14]]:vreg_64, 2, 0, implicit $exec :: (load (s8) from %ir.p3 + 2, addrspace 1) +; CHECK-NEXT: [[Reg46:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[Reg14]]:vreg_64, 3, 0, implicit $exec :: (load (s8) from %ir.p3 + 3, addrspace 1) +; CHECK-NEXT: [[Reg47:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg46]]:vgpr_32, 8, killed [[Reg45]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg48:%[0-9]+]]:vgpr_32 = V_LSHL_OR_B32_e64 killed [[Reg47]]:vgpr_32, 16, killed [[Reg44]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg49:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[Reg13]]:vreg_64, 0, 0, implicit $exec :: (load (s32) from %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg50:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[Reg49]]:vgpr_32, killed [[Reg27]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg51:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg49]]:vgpr_32, [[Reg50]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg14]]:vreg_64, [[Reg51]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p3, addrspace 1) +; CHECK-NEXT: [[Reg52:%[0-9]+]]:vgpr_32 = V_SUB_U32_e64 killed [[Reg51]]:vgpr_32, killed [[Reg50]]:vgpr_32, 0, implicit $exec +; CHECK-NEXT: [[Reg53:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg52]]:vgpr_32, killed [[Reg11]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg13]]:vreg_64, killed [[Reg53]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p4, addrspace 1) +; CHECK-NEXT: [[Reg54:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32_e64 killed [[Reg17]]:vgpr_32, killed [[Reg48]]:vgpr_32, implicit $exec +; CHECK-NEXT: [[Reg55:%[0-9]+]]:vgpr_32 = V_ADD3_U32_e64 killed [[Reg41]]:vgpr_32, killed [[Reg26]]:vgpr_32, killed [[Reg54]]:vgpr_32, implicit $exec +; CHECK-NEXT: GLOBAL_STORE_DWORD killed [[Reg12]]:vreg_64, killed [[Reg55]]:vgpr_32, 0, 0, implicit $exec :: (store (s32) into %ir.p5, addrspace 1) +; CHECK-NEXT: S_ENDPGM 0 +; EMPTY: +; CHECK: # End machine code for function test. +; EMPTY: +; CHECK: Next-use distance of Register [[Reg11]] = 17.0 +; CHECK-NEXT: Next-use distance of Register [[Reg10]] = 10.0 +; CHECK-NEXT: Next-use distance of Register [[Reg9]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg8]] = 9.0 +; CHECK-NEXT: Next-use distance of Register [[Reg7]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg6]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg5]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg4]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg3]] = 7.0 +; CHECK-NEXT: Next-use distance of Register [[Reg2]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg1]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg12]] = 10039.0 +; CHECK-NEXT: Next-use distance of Register [[Reg13]] = 10029.0 +; CHECK-NEXT: Next-use distance of Register [[Reg14]] = 10021.0 +; CHECK-NEXT: Next-use distance of Register [[Reg15]] = 10010.0 +; CHECK-NEXT: Next-use distance of Register [[Reg16]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg17]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg18]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg19]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg20]] = 5.0 +; CHECK-NEXT: Next-use distance of Register [[Reg22]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg23]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg24]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg25]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg21]] = 3.0 +; CHECK-NEXT: Next-use distance of Register [[Reg26]] = 10002.0 +; CHECK-NEXT: Next-use distance of Register [[Reg27]] = 10023.0 +; CHECK-NEXT: Next-use distance of Register [[Reg28]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg29]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg30]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg31]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg32]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg33]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg34]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg35]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg36]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg37]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg38]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg39]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg40]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg41]] = 16.0 +; CHECK-NEXT: Next-use distance of Register [[Reg42]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg43]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg44]] = 4.0 +; CHECK-NEXT: Next-use distance of Register [[Reg45]] = 2.0 +; CHECK-NEXT: Next-use distance of Register [[Reg46]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg47]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg48]] = 8.0 +; CHECK-NEXT: Next-use distance of Register [[Reg49]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg50]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg51]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg52]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg53]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg54]] = 1.0 +; CHECK-NEXT: Next-use distance of Register [[Reg55]] = 1.0 +entry: +; entry +; | +; +<----+ +; loop | +; +-----+ +; | +; exit + %ld1 = load i32, ptr addrspace(1) %p2, align 4 + store i32 %TC, ptr addrspace(1) %p2 + br label %loop + +loop: + %phi = phi i32 [ 100, %entry ], [ %add, %loop ] + %phi.inc = phi i32 [ 0, %entry ], [ %inc, %loop ] + %sext = sext i32 %phi.inc to i64 + %gep = getelementptr inbounds i32, ptr addrspace(1) %p1, i64 %sext + %ld = load i32, ptr addrspace(1) %gep, align 1 + %add = add i32 %ld, %phi.inc + %inc = add i32 %phi.inc, 1 + %cond = icmp ult i32 %inc, %ld1 + br i1 %cond, label %loop, label %exit + +exit: + %ld2 = load i32, ptr addrspace(1) %p3, align 1 + %ld3 = load i32, ptr addrspace(1) %p4 + %add1 = add i32 %ld3, %inc + %mul1 = mul i32 %ld3, %add1 + store i32 %mul1, ptr addrspace(1) %p3 + %sub1 = sub i32 %mul1, %add1 + %mul2 = mul i32 %sub1, %TC + store i32 %mul2, ptr addrspace(1) %p4 + %mul3 = mul i32 %ld1, %ld2 + %add2 = add i32 %mul3, %add + store i32 %add2, ptr addrspace(1) %p5 + ret void +}