diff --git a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h index 03ba952c354ae..9bca74a1d4fc8 100644 --- a/llvm/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/llvm/include/llvm/CodeGen/MachineRegisterInfo.h @@ -752,17 +752,6 @@ class MachineRegisterInfo { Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name = ""); - /// All avilable attributes a virtual register can have. - struct RegisterAttributes { - const RegClassOrRegBank *RCOrRB; - LLT Ty; - }; - - /// createVirtualRegister - Create and return a new virtual register in the - /// function with the specified register attributes. - Register createVirtualRegister(RegisterAttributes RegAttr, - StringRef Name = ""); - /// Create and return a new virtual register in the function with the same /// attributes as the given register. Register cloneVirtualRegister(Register VReg, StringRef Name = ""); diff --git a/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h b/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h index 1039ac4e5189b..e6da099751e7a 100644 --- a/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h +++ b/llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h @@ -32,25 +32,6 @@ MachineUniformityInfo computeMachineUniformityInfo( MachineFunction &F, const MachineCycleInfo &cycleInfo, const MachineDomTree &domTree, bool HasBranchDivergence); -/// Legacy analysis pass which computes a \ref MachineUniformityInfo. -class MachineUniformityAnalysisPass : public MachineFunctionPass { - MachineUniformityInfo UI; - -public: - static char ID; - - MachineUniformityAnalysisPass(); - - MachineUniformityInfo &getUniformityInfo() { return UI; } - const MachineUniformityInfo &getUniformityInfo() const { return UI; } - - bool runOnMachineFunction(MachineFunction &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override; - void print(raw_ostream &OS, const Module *M = nullptr) const override; - - // TODO: verify analysis -}; - } // namespace llvm #endif // LLVM_CODEGEN_MACHINEUNIFORMITYANALYSIS_H diff --git a/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/llvm/lib/CodeGen/MachineRegisterInfo.cpp index d286128cc89be..087604af6a718 100644 --- a/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -167,17 +167,6 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass, return Reg; } -/// createVirtualRegister - Create and return a new virtual register in the -/// function with the specified register attributes. -Register MachineRegisterInfo::createVirtualRegister(RegisterAttributes RegAttr, - StringRef Name) { - Register Reg = createIncompleteVirtualRegister(Name); - VRegInfo[Reg].first = *RegAttr.RCOrRB; - setType(Reg, RegAttr.Ty); - noteNewVirtualRegister(Reg); - return Reg; -} - Register MachineRegisterInfo::cloneVirtualRegister(Register VReg, StringRef Name) { Register Reg = createIncompleteVirtualRegister(Name); diff --git a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp index 131138e0649e4..3e0fe2b1ba087 100644 --- a/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp +++ b/llvm/lib/CodeGen/MachineUniformityAnalysis.cpp @@ -165,6 +165,25 @@ MachineUniformityInfo llvm::computeMachineUniformityInfo( namespace { +/// Legacy analysis pass which computes a \ref MachineUniformityInfo. +class MachineUniformityAnalysisPass : public MachineFunctionPass { + MachineUniformityInfo UI; + +public: + static char ID; + + MachineUniformityAnalysisPass(); + + MachineUniformityInfo &getUniformityInfo() { return UI; } + const MachineUniformityInfo &getUniformityInfo() const { return UI; } + + bool runOnMachineFunction(MachineFunction &F) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + void print(raw_ostream &OS, const Module *M = nullptr) const override; + + // TODO: verify analysis +}; + class MachineUniformityInfoPrinterPass : public MachineFunctionPass { public: static char ID; diff --git a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp index 4f65a95de82ac..4cd8b1ec1051f 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp @@ -16,11 +16,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPU.h" -#include "SILowerI1Copies.h" -#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineUniformityAnalysis.h" -#include "llvm/InitializePasses.h" #define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering" @@ -46,146 +42,14 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesCFG(); - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } }; -class DivergenceLoweringHelper : public PhiLoweringHelper { -public: - DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT, - MachinePostDominatorTree *PDT, - MachineUniformityInfo *MUI); - -private: - MachineUniformityInfo *MUI = nullptr; - MachineIRBuilder B; - Register buildRegCopyToLaneMask(Register Reg); - -public: - void markAsLaneMask(Register DstReg) const override; - void getCandidatesForLowering( - SmallVectorImpl &Vreg1Phis) const override; - void collectIncomingValuesFromPhi( - const MachineInstr *MI, - SmallVectorImpl &Incomings) const override; - void replaceDstReg(Register NewReg, Register OldReg, - MachineBasicBlock *MBB) override; - void buildMergeLaneMasks(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, const DebugLoc &DL, - Register DstReg, Register PrevReg, - Register CurReg) override; - void constrainAsLaneMask(Incoming &In) override; -}; - -DivergenceLoweringHelper::DivergenceLoweringHelper( - MachineFunction *MF, MachineDominatorTree *DT, - MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI) - : PhiLoweringHelper(MF, DT, PDT), MUI(MUI), B(*MF) {} - -// _(s1) -> SReg_32/64(s1) -void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const { - assert(MRI->getType(DstReg) == LLT::scalar(1)); - - if (MRI->getRegClassOrNull(DstReg)) { - if (MRI->constrainRegClass(DstReg, ST->getBoolRC())) - return; - llvm_unreachable("Failed to constrain register class"); - } - - MRI->setRegClass(DstReg, ST->getBoolRC()); -} - -void DivergenceLoweringHelper::getCandidatesForLowering( - SmallVectorImpl &Vreg1Phis) const { - LLT S1 = LLT::scalar(1); - - // Add divergent i1 phis to the list - for (MachineBasicBlock &MBB : *MF) { - for (MachineInstr &MI : MBB.phis()) { - Register Dst = MI.getOperand(0).getReg(); - if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst)) - Vreg1Phis.push_back(&MI); - } - } -} - -void DivergenceLoweringHelper::collectIncomingValuesFromPhi( - const MachineInstr *MI, SmallVectorImpl &Incomings) const { - for (unsigned i = 1; i < MI->getNumOperands(); i += 2) { - Incomings.emplace_back(MI->getOperand(i).getReg(), - MI->getOperand(i + 1).getMBB(), Register()); - } -} - -void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg, - MachineBasicBlock *MBB) { - BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg) - .addReg(NewReg); -} - -// Copy Reg to new lane mask register, insert a copy after instruction that -// defines Reg while skipping phis if needed. -Register DivergenceLoweringHelper::buildRegCopyToLaneMask(Register Reg) { - Register LaneMask = createLaneMaskReg(MRI, LaneMaskRegAttrs); - MachineInstr *Instr = MRI->getVRegDef(Reg); - MachineBasicBlock *MBB = Instr->getParent(); - B.setInsertPt(*MBB, MBB->SkipPHIsAndLabels(std::next(Instr->getIterator()))); - B.buildCopy(LaneMask, Reg); - return LaneMask; -} - -// bb.previous -// %PrevReg = ... -// -// bb.current -// %CurReg = ... -// -// %DstReg - not defined -// -// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT) -// -// bb.previous -// %PrevReg = ... -// %PrevRegCopy:sreg_32(s1) = COPY %PrevReg -// -// bb.current -// %CurReg = ... -// %CurRegCopy:sreg_32(s1) = COPY %CurReg -// ... -// %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0 -// %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0 -// %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg -// -// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg -void DivergenceLoweringHelper::buildMergeLaneMasks( - MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, - Register DstReg, Register PrevReg, Register CurReg) { - // DstReg = (PrevReg & !EXEC) | (CurReg & EXEC) - // TODO: check if inputs are constants or results of a compare. - - Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg); - Register CurRegCopy = buildRegCopyToLaneMask(CurReg); - Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs); - Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs); - - B.setInsertPt(MBB, I); - B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg}); - B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy}); - B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg}); -} - -void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) { return; } - } // End anonymous namespace. INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE, "AMDGPU GlobalISel divergence lowering", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) -INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass) INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE, "AMDGPU GlobalISel divergence lowering", false, false) @@ -200,12 +64,5 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() { bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction( MachineFunction &MF) { - MachineDominatorTree &DT = getAnalysis(); - MachinePostDominatorTree &PDT = getAnalysis(); - MachineUniformityInfo &MUI = - getAnalysis().getUniformityInfo(); - - DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI); - - return Helper.lowerPhis(); + return false; } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 610cedd4ea6fc..fdee74d58d269 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -210,7 +210,6 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const { const Register DefReg = I.getOperand(0).getReg(); const LLT DefTy = MRI->getType(DefReg); - if (DefTy == LLT::scalar(1)) { if (!AllowRiskySelect) { LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n"); @@ -3553,6 +3552,8 @@ bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const { } bool AMDGPUInstructionSelector::select(MachineInstr &I) { + if (I.isPHI()) + return selectPHI(I); if (!I.isPreISelOpcode()) { if (I.isCopy()) @@ -3695,8 +3696,6 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) { return selectWaveAddress(I); case AMDGPU::G_STACKRESTORE: return selectStackRestore(I); - case AMDGPU::G_PHI: - return selectPHI(I); default: return selectImpl(I, *CoverageInfo); } diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp index 59843438950ac..cfa0c21def791 100644 --- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp @@ -31,9 +31,9 @@ using namespace llvm; -static Register -insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI, - MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs); +static Register insertUndefLaneMask(MachineBasicBlock *MBB, + MachineRegisterInfo *MRI, + Register LaneMaskRegAttrs); namespace { @@ -78,7 +78,7 @@ class Vreg1LoweringHelper : public PhiLoweringHelper { MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, Register PrevReg, Register CurReg) override; - void constrainAsLaneMask(Incoming &In) override; + void constrainIncomingRegisterTakenAsIs(Incoming &In) override; bool lowerCopiesFromI1(); bool lowerCopiesToI1(); @@ -304,8 +304,7 @@ class LoopFinder { /// blocks, so that the SSA updater doesn't have to search all the way to the /// function entry. void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater, - MachineRegisterInfo &MRI, - MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs, + MachineRegisterInfo &MRI, Register LaneMaskRegAttrs, ArrayRef Incomings = {}) { assert(LoopLevel < CommonDominators.size()); @@ -412,15 +411,14 @@ FunctionPass *llvm::createSILowerI1CopiesPass() { return new SILowerI1Copies(); } -Register llvm::createLaneMaskReg( - MachineRegisterInfo *MRI, - MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs) { - return MRI->createVirtualRegister(LaneMaskRegAttrs); +Register llvm::createLaneMaskReg(MachineRegisterInfo *MRI, + Register LaneMaskRegAttrs) { + return MRI->cloneVirtualRegister(LaneMaskRegAttrs); } -static Register -insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI, - MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs) { +static Register insertUndefLaneMask(MachineBasicBlock *MBB, + MachineRegisterInfo *MRI, + Register LaneMaskRegAttrs) { MachineFunction &MF = *MBB->getParent(); const GCNSubtarget &ST = MF.getSubtarget(); const SIInstrInfo *TII = ST.getInstrInfo(); @@ -621,7 +619,7 @@ bool PhiLoweringHelper::lowerPhis() { for (auto &Incoming : Incomings) { MachineBasicBlock &IMBB = *Incoming.Block; if (PIA.isSource(IMBB)) { - constrainAsLaneMask(Incoming); + constrainIncomingRegisterTakenAsIs(Incoming); SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg); } else { Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs); @@ -913,4 +911,6 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB, } } -void Vreg1LoweringHelper::constrainAsLaneMask(Incoming &In) {} +void Vreg1LoweringHelper::constrainIncomingRegisterTakenAsIs(Incoming &In) { + return; +} diff --git a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h index 80ea0536b4cbf..5099d39c2d141 100644 --- a/llvm/lib/Target/AMDGPU/SILowerI1Copies.h +++ b/llvm/lib/Target/AMDGPU/SILowerI1Copies.h @@ -31,9 +31,7 @@ struct Incoming { : Reg(Reg), Block(Block), UpdatedReg(UpdatedReg) {} }; -Register -createLaneMaskReg(MachineRegisterInfo *MRI, - MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs); +Register createLaneMaskReg(MachineRegisterInfo *MRI, Register LaneMaskRegAttrs); class PhiLoweringHelper { public: @@ -49,7 +47,7 @@ class PhiLoweringHelper { MachineRegisterInfo *MRI = nullptr; const GCNSubtarget *ST = nullptr; const SIInstrInfo *TII = nullptr; - MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs; + Register LaneMaskRegAttrs; #ifndef NDEBUG DenseSet PhiRegisters; @@ -70,8 +68,7 @@ class PhiLoweringHelper { getSaluInsertionAtEnd(MachineBasicBlock &MBB) const; void initializeLaneMaskRegisterAttributes(Register LaneMask) { - LaneMaskRegAttrs.RCOrRB = &MRI->getRegClassOrRegBank(LaneMask); - LaneMaskRegAttrs.Ty = MRI->getType(LaneMask); + LaneMaskRegAttrs = LaneMask; } bool isLaneMaskReg(Register Reg) const { @@ -94,7 +91,7 @@ class PhiLoweringHelper { MachineBasicBlock::iterator I, const DebugLoc &DL, Register DstReg, Register PrevReg, Register CurReg) = 0; - virtual void constrainAsLaneMask(Incoming &In) = 0; + virtual void constrainIncomingRegisterTakenAsIs(Incoming &In) = 0; }; } // end namespace llvm diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll index 06a8f80e6aa34..7a68aec1a1c55 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s -; REQUIRES: do-not-run-me +; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; Divergent phis that don't require lowering using lane mask merging diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir index 55f22b0bbb4df..d314ebe355f51 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.mir @@ -1,9 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s - -# Test is updated but copies between S1-register-with-reg-class and -# register-with-reg-class-no-LLT fail machine verification -# REQUIRES: do-not-run-me-with-machine-verifier +# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering %s -o - | FileCheck -check-prefix=GFX10 %s --- | define void @divergent_i1_phi_uniform_branch() {ret void} @@ -50,7 +46,7 @@ body: | ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32(s1) = G_PHI %14(s1), %bb.3, [[ICMP]](s1), %bb.0 + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI %14(s1), %bb.3, [[ICMP]](s1), %bb.0 ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: @@ -130,7 +126,6 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $sgpr0 ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C1]] ; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.2 @@ -141,17 +136,12 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[ICMP]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C4]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI]](s1), [[C4]], [[C3]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: @@ -201,37 +191,30 @@ body: | ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32) ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[XOR]](s1), %bb.1 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[C5]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -296,28 +279,25 @@ body: | ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]] - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.2(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %39(s1), %bb.5 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = G_PHI [[FCMP]](s1), %bb.0, %19(s1), %bb.5 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.5, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.5 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[FCMP]](s1), %bb.0, %19(s1), %bb.5 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1000 - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI2]](s32), [[C3]] + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sle), [[PHI1]](s32), [[C3]] ; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.4 ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C2]](s1), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %24(s1), %bb.4, [[C2]](s1), %bb.1 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI4]], [[C4]] + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C4]] ; GFX10-NEXT: G_BRCOND [[XOR]](s1), %bb.5 ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} @@ -340,26 +320,22 @@ body: | ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C8]] - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[XOR1]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) + ; GFX10-NEXT: [[XOR1:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C8]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32) ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C9]] - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C9]] + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[XOR1]](s1), %bb.5 ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) ; GFX10-NEXT: [[C10:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY10]](s1), [[C11]], [[C10]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI4]](s1), [[C11]], [[C10]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -492,7 +468,7 @@ body: | ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %30(s32), %bb.4, [[DEF]](s32), %bb.0 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32(s1) = G_PHI %32(s1), %bb.4, [[C5]](s1), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s1) = G_PHI %32(s1), %bb.4, [[C5]](s1), %bb.0 ; GFX10-NEXT: G_BRCOND [[PHI1]](s1), %bb.5 ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll index 808c2c2ded201..34dedfe10365f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -amdgpu-global-isel-risky-select -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s -; REQUIRES: do-not-run-me +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -amdgpu-global-isel-risky-select -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; This file contains various tests that have divergent i1s used outside of ; the loop. These are lane masks is sgpr and need to have correct value in diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir index d84ccece81c78..92463714ec694 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-used-outside-loop.mir @@ -1,9 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s - -# Test is updated but copies between S1-register-with-reg-class and -# register-with-reg-class-no-LLT fail machine verification -# REQUIRES: do-not-run-me-with-machine-verifier +# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering %s -o - | FileCheck -check-prefix=GFX10 %s --- name: divergent_i1_phi_used_outside_loop @@ -23,49 +19,30 @@ body: | ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[FCMP]](s1) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32 = IMPLICIT_DEF - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %36(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.0, %24(s1), %bb.1 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[FCMP]](s1), %bb.0, %13(s1), %bb.1 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY7]], [[C2]] - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI3]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32) ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI2]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[PHI2]](s1), %bb.1 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_2]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY11]](s1), [[C5]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[C5]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -126,67 +103,42 @@ body: | ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[DEF]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %41, %bb.3 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[S_OR_B32_]](s1), %bb.0, %27(s1), %bb.3 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.3 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(p1) = G_PHI [[MV]](p1), %bb.0, %11(p1), %bb.3 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI1]] - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %9(s32), %bb.3 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(p1) = G_PHI [[MV]](p1), %bb.0, %11(p1), %bb.3 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[C1]](s1), %bb.0, %13(s1), %bb.3 + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI2]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PHI3]](p1) :: (load (s32), addrspace 1) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PHI1]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C2]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.2 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]] - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY12]](s1) + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.2, [[PHI2]](s1), %bb.1 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 - ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[PHI3]], [[C3]](s64) + ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[PHI1]], [[C3]](s64) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[PHI2]], [[C4]] + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[PHI]], [[C4]] ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 10 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sge), [[ADD]](s32), [[C5]] - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc ; GFX10-NEXT: G_BRCOND [[ICMP1]](s1), %bb.1 ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[PHI2]](s1), %bb.3 ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY14]](s1), [[C7]], [[C6]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI4]](s1), [[C7]], [[C6]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -259,37 +211,30 @@ body: | ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[COPY1]](s32), [[C1]] - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %24(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32(s1) = G_PHI [[FCMP]](s1), %bb.0, %13(s1), %bb.1 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.1, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.1 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[FCMP]](s1), %bb.0, %13(s1), %bb.1 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]] - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32) ; GFX10-NEXT: [[FCMP1:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP1]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[XOR]](s1), %bb.1 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C5]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[C5]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -353,7 +298,6 @@ body: | ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C1]](s1) ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} @@ -361,49 +305,29 @@ body: | ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[C1]](s1), %bb.0, %39(s1), %bb.8 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI %14(s1), %bb.8, [[C1]](s1), %bb.0 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY6]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[DEF3]](s1), %bb.1, %72(s1), %bb.7 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF2]](s1), %bb.1, %61, %bb.7 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.1, %48, %bb.7 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7 - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]] - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]] + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C2]](s32), %bb.1, %17(s32), %bb.7 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %19(s32), %bb.7, [[C2]](s32), %bb.1 ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI5]](s32) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C4]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C5]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} @@ -411,17 +335,9 @@ body: | ; GFX10-NEXT: successors: %bb.7(0x80000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[C6]](s1) ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI5]], [[C7]] - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI5]](s32), [[COPY]] - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C7]] + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI2]](s32), [[COPY]] ; GFX10-NEXT: G_BR %bb.7 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: @@ -437,32 +353,22 @@ body: | ; GFX10-NEXT: bb.7: ; GFX10-NEXT: successors: %bb.8(0x04000000), %bb.3(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]](s1), %bb.3, [[S_OR_B32_3]](s1), %bb.4 - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.3, [[S_OR_B32_2]](s1), %bb.4 - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3 - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]] - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]] + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.4, [[DEF]](s32), %bb.3 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[C6]](s1), %bb.4, [[C3]](s1), %bb.3 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s1) = G_PHI [[ICMP2]](s1), %bb.4, [[C3]](s1), %bb.3 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32) ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[COPY17]], [[C9]] - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY16]](s1), [[PHI4]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI4]], [[C9]] + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI5]](s1), [[PHI1]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.8 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.8: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.7 - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_4]](s1) - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY19]](s1) - ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY20]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_5]](s1), [[S_AND_B32_5]](s1), implicit-def $scc + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s1) = G_PHI [[XOR]](s1), %bb.7 + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.7 + ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) ; GFX10-NEXT: G_BR %bb.2 bb.0: successors: %bb.1(0x40000000), %bb.2(0x40000000) @@ -573,40 +479,35 @@ body: | ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF1]](s1), %bb.0, %38(s1), %bb.6 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %11(s32), %bb.6, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %13(s32), %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PHI2]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[ICMP]](s1) + ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[PHI1]] ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32) ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C1]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64) - ; GFX10-NEXT: G_STORE [[PHI2]](s32), [[PTR_ADD]](p1) :: (store (s32), addrspace 1) + ; GFX10-NEXT: G_STORE [[PHI1]](s32), [[PTR_ADD]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: ; GFX10-NEXT: successors: %bb.5(0x40000000), %bb.6(0x40000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[C2]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI2]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[PHI1]] ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.6, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} @@ -614,35 +515,27 @@ body: | ; GFX10-NEXT: successors: %bb.6(0x80000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C4]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C4]] ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.7(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[C2]](s1), %bb.4, [[S_OR_B32_]](s1), %bb.5 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]] + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD]](s32), %bb.5, [[DEF]](s32), %bb.4 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C3]](s1), %bb.5, [[C2]](s1), %bb.4 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY10]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI3]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.7 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.7: ; GFX10-NEXT: successors: %bb.8(0x40000000), %bb.9(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.6 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI2]](s32), %bb.6 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_1]](s1) - ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) - ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY11]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.6 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[ICMP]](s1), %bb.6 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[PHI1]](s32), %bb.6 + ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) + ; GFX10-NEXT: [[SI_IF2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI5]](s1), %bb.9, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.8 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.8: @@ -755,75 +648,47 @@ body: | ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY3]](s32), [[COPY4]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %53(s1), %bb.3 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %42, %bb.3 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[C1]](s1), %bb.0, %32(s1), %bb.3 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %12(s32), %bb.3 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI2]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[COPY7]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY7]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %10(s32), %bb.3, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %12(s32), %bb.3 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[C1]](s1), %bb.0, %14(s1), %bb.3 + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI2]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[PHI2]], %bb.1, [[DEF2]](s1), %bb.2 - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]] - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]] + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.2, [[PHI2]](s1), %bb.1 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[COPY11]] - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[FREEZE]](s1) - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[FREEZE]](s1) + ; GFX10-NEXT: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[PHI3]] ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C4]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI4]](s32), [[COPY]] - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI3]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY5]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C4]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[PHI1]](s32), [[COPY]] + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_3]](s1) - ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[FREEZE]](s1), %bb.3 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3 + ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY15]](s1), [[C6]], [[C5]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI4]](s1), [[C6]], [[C5]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV1]](p0) :: (store (s32)) ; GFX10-NEXT: S_ENDPGM 0 bb.0: @@ -905,39 +770,20 @@ body: | ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF3]](s1), %bb.0, %67(s1), %bb.5 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[DEF2]](s1), %bb.0, %56, %bb.5 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %43, %bb.5 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} @@ -952,7 +798,6 @@ body: | ; GFX10-NEXT: successors: %bb.5(0x80000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1) ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32) ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64) @@ -960,16 +805,9 @@ body: | ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]] ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C7]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C7]] ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C8]] - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI1]](s32), [[C8]] ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: @@ -979,27 +817,21 @@ body: | ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3 - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]] - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]] - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY16]](s1) + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C5]](s1), %bb.3, [[C1]](s1), %bb.1 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.3, [[C1]](s1), %bb.1 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI4]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_4]](s1) - ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32) - ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[PHI3]](s1), %bb.5 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5 + ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI6]](s32) + ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI5]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 bb.0: successors: %bb.1(0x80000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll index fb1253c7a17d9..c1f3924e466d5 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -amdgpu-global-isel-risky-select -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s -; REQUIRES: do-not-run-me +; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -amdgpu-global-isel-risky-select -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; Simples case, if - then, that requires lane mask merging, ; %phi lane mask will hold %val_A at %A. Lanes that are active in %B diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir index 0e6588b4593c9..9461d558684e8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-structurizer.mir @@ -1,9 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s - -# Test is updated but copies between S1-register-with-reg-class and -# register-with-reg-class-no-LLT fail machine verification -# REQUIRES: do-not-run-me-with-machine-verifier +# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering %s -o - | FileCheck -check-prefix=GFX10 %s --- name: divergent_i1_phi_if_then @@ -22,7 +18,6 @@ body: | ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 6 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP]](s1) ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(eq), [[COPY3]](s32), [[C1]] ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP1]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec @@ -33,18 +28,13 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[ICMP]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.1 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI [[ICMP]](s1), %bb.0, [[ICMP2]](s1), %bb.1 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY6]](s1), [[C4]], [[C3]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI]](s1), [[C4]], [[C3]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: @@ -95,7 +85,6 @@ body: | ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[COPY3]](s32), [[C]] ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec @@ -104,9 +93,7 @@ body: | ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %19(s1), %bb.3 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[COPY5]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI %10(s1), %bb.3, [[DEF]](s1), %bb.0 ; GFX10-NEXT: [[SI_ELSE:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_ELSE [[SI_IF]](s32), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} @@ -115,10 +102,6 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(uge), [[COPY2]](s32), [[C1]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: @@ -126,19 +109,14 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[COPY2]](s32), [[C2]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY8]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.1 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[COPY5]](s1), %bb.1, [[S_OR_B32_]](s1), %bb.2 - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]] + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s1) = G_PHI [[PHI]](s1), %bb.1, [[ICMP1]](s1), %bb.2 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_ELSE]](s32) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY9]](s1), [[C3]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI1]](s1), [[C3]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p1) :: (store (s32), addrspace 1) ; GFX10-NEXT: S_ENDPGM 0 bb.0: @@ -205,28 +183,20 @@ body: | ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %35, %bb.3 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.3, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.3 - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %9(s32), %bb.3, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %11(s32), %bb.3 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY5]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} @@ -240,28 +210,23 @@ body: | ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C5]] ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C5]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C5]] ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C6]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI1]](s32), [[C6]] ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.4(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.2 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.2, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]] + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.2, [[DEF]](s32), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.2, [[C1]](s1), %bb.1 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY8]](s1), [[PHI1]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI3]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3 - ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3 + ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) @@ -343,28 +308,20 @@ body: | ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %48, %bb.3 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.3, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.3 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.3, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.3 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY7]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} @@ -372,7 +329,6 @@ body: | ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32) ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) @@ -385,11 +341,10 @@ body: | ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, %47(s1), %bb.5 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %32(s32), %bb.5, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]] + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %32(s32), %bb.5, [[DEF]](s32), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %34(s1), %bb.5, [[C1]](s1), %bb.1 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY10]](s1), [[PHI1]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI3]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} @@ -403,30 +358,21 @@ body: | ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD2]], [[C8]] ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD2]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C8]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C8]] ; GFX10-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C9]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY9]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI1]](s32), [[C9]] ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[C4]](s1), %bb.2, [[S_OR_B32_1]](s1), %bb.4 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]] - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[COPY12]](s1) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.4, [[DEF]](s32), %bb.2 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s1) = G_PHI [[ICMP2]](s1), %bb.4, [[C4]](s1), %bb.2 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3 - ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3 + ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI6]](s32) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) @@ -535,28 +481,20 @@ body: | ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.3(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %61, %bb.3 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %15(s32), %bb.3, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %17(s32), %bb.3 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI2]](s32) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} @@ -564,7 +502,6 @@ body: | ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C5]](s32) ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV2]], [[SHL1]](s64) @@ -577,11 +514,10 @@ body: | ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.8(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, %60(s1), %bb.5 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %35(s32), %bb.5, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]] + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %35(s32), %bb.5, [[DEF]](s32), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI %37(s1), %bb.5, [[C1]](s1), %bb.1 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY12]](s1), [[PHI1]](s32) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI3]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.8 ; GFX10-NEXT: {{ $}} @@ -589,7 +525,6 @@ body: | ; GFX10-NEXT: successors: %bb.6(0x40000000), %bb.7(0x40000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C7]](s1) ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C8]](s32) ; GFX10-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV3]], [[SHL2]](s64) @@ -602,14 +537,9 @@ body: | ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.3(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[C4]](s1), %bb.2, %71(s1), %bb.7 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI %46(s32), %bb.7, [[DEF]](s32), %bb.2 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]] - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[COPY14]](s1) + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI %46(s32), %bb.7, [[DEF]](s32), %bb.2 + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s1) = G_PHI %47(s1), %bb.7, [[C4]](s1), %bb.2 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: @@ -622,30 +552,21 @@ body: | ; GFX10-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD3]], [[C11]] ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD3]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C11]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C11]] ; GFX10-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI2]](s32), [[C12]] - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP3]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY13]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; GFX10-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI1]](s32), [[C12]] ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.7: ; GFX10-NEXT: successors: %bb.5(0x80000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[C7]](s1), %bb.4, [[S_OR_B32_2]](s1), %bb.6 - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.6, [[DEF]](s32), %bb.4 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]] - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[COPY17]](s1) + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.6, [[DEF]](s32), %bb.4 + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s1) = G_PHI [[ICMP3]](s1), %bb.6, [[C7]](s1), %bb.4 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF2]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY18]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.8: - ; GFX10-NEXT: [[PHI9:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3 - ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI9]](s32) + ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.3 + ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32) ; GFX10-NEXT: S_ENDPGM 0 bb.0: successors: %bb.1(0x80000000) @@ -775,39 +696,20 @@ body: | ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x40000000), %bb.5(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF3]](s1), %bb.0, %67(s1), %bb.5 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[DEF2]](s1), %bb.0, %56, %bb.5 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %43, %bb.5 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5 - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]] - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %12(s32), %bb.5, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %14(s32), %bb.5 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI4]](s32) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[ICMP]](s1), %bb.5, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.3 ; GFX10-NEXT: {{ $}} @@ -822,7 +724,6 @@ body: | ; GFX10-NEXT: successors: %bb.5(0x80000000) ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[C5]](s1) ; GFX10-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL1:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C6]](s32) ; GFX10-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV]], [[SHL1]](s64) @@ -830,16 +731,9 @@ body: | ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]] ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI4]], [[C7]] + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C7]] ; GFX10-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 100 - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI4]](s32), [[C8]] - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY12]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY14]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI1]](s32), [[C8]] ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: @@ -849,27 +743,21 @@ body: | ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_1]](s1), %bb.1, [[S_OR_B32_3]](s1), %bb.3 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_2]](s1), %bb.3 - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[PHI5]] - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[PHI6]] - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY16]](s1) + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C5]](s1), %bb.3, [[C1]](s1), %bb.1 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.3, [[C1]](s1), %bb.1 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[SI_IF]](s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY15]](s1), [[PHI3]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY6]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY17]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI4]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5 - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_4]](s1) - ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI8]](s32) - ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY18]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[PHI3]](s1), %bb.5 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5 + ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI6]](s32) + ; GFX10-NEXT: [[SI_IF1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI5]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 bb.0: successors: %bb.1(0x80000000) @@ -969,11 +857,7 @@ body: | ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[COPY6:%[0-9]+]]:sreg_32(s1) = COPY [[DEF]](s1) ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY4]](s32), [[COPY1]] - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF3:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: G_BR %bb.7 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: @@ -986,27 +870,18 @@ body: | ; GFX10-NEXT: bb.2: ; GFX10-NEXT: successors: %bb.4(0x40000000), %bb.7(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI %67(s1), %bb.6, %70, %bb.7 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI %49(s1), %bb.6, %48(s1), %bb.7 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:sreg_32 = PHI %35(s1), %bb.6, %34(s1), %bb.7 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]] - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[PHI2]] - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[COPY9]](s1) + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s1) = G_PHI %12(s1), %bb.6, [[DEF]](s1), %bb.7 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s1) = G_PHI %12(s1), %bb.6, %14(s1), %bb.7 ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), %15(s32) - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY8]](s1), %17(s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY10]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_]](s1) + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI1]](s1), %17(s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.4 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.3: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.3(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.1, %19(s32), %bb.3 - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI3]](s32) + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.1, %19(s32), %bb.3 + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[ICMP1]](s1), [[PHI2]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT1]](s32), %bb.3, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} @@ -1015,28 +890,18 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[INTRINSIC_CONVERGENT]](s32) ; GFX10-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[COPY5]](s32), [[COPY]] - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP2]](s1) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[C2]](s1) ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP]], [[C2]] ; GFX10-NEXT: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP2]], [[XOR]] ; GFX10-NEXT: [[INTRINSIC_CONVERGENT2:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[OR]](s1), %25(s32) - ; GFX10-NEXT: [[DEF4:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF5:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %63(s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY11]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY13]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT2]](s32), %bb.7, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.5: + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[ICMP2]](s1), %bb.4 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT2]](s32), %bb.4 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_1]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY14]](s1), [[COPY3]], [[COPY2]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[COPY3]], [[COPY2]] ; GFX10-NEXT: [[INTRINSIC_CONVERGENT3:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[SELECT]](s32) ; GFX10-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT3]](s32) ; GFX10-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 @@ -1046,42 +911,17 @@ body: | ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT1]](s32), %bb.3 ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s1) = G_CONSTANT i1 false - ; GFX10-NEXT: [[COPY15:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) - ; GFX10-NEXT: [[COPY16:%[0-9]+]]:sreg_32(s1) = COPY [[C3]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI5]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_3:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %42(s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_3:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY16]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_3:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_3]](s1), [[S_AND_B32_3]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_ANDN2_B32_4:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 %56(s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_4:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY15]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_4:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_4]](s1), [[S_AND_B32_4]](s1), implicit-def $scc - ; GFX10-NEXT: [[DEF6:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.7: ; GFX10-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[ICMP]](s1), %bb.0, [[S_OR_B32_]](s1), %bb.2, [[S_OR_B32_2]](s1), %bb.4 - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:sreg_32 = PHI [[DEF3]](s1), %bb.0, [[PHI7]], %bb.2, [[S_OR_B32_1]](s1), %bb.4 - ; GFX10-NEXT: [[PHI8:%[0-9]+]]:sreg_32 = PHI [[DEF2]](s1), %bb.0, [[PHI1]], %bb.2, [[DEF5]](s1), %bb.4 - ; GFX10-NEXT: [[PHI9:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, [[PHI2]], %bb.2, [[DEF4]](s1), %bb.4 - ; GFX10-NEXT: [[PHI10:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT2]](s32), %bb.4, [[PHI10]](s32), %bb.2, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI11:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.4, [[INTRINSIC_CONVERGENT]](s32), %bb.2, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[COPY17:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]] - ; GFX10-NEXT: [[COPY18:%[0-9]+]]:sreg_32(s1) = COPY [[PHI7]] - ; GFX10-NEXT: [[COPY19:%[0-9]+]]:sreg_32(s1) = COPY [[PHI8]] - ; GFX10-NEXT: [[COPY20:%[0-9]+]]:sreg_32(s1) = COPY [[PHI9]] + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT2]](s32), %bb.4, [[PHI6]](s32), %bb.2, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.4, [[INTRINSIC_CONVERGENT]](s32), %bb.2, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI8:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[ICMP]](s1), %bb.0, [[PHI]](s1), %bb.2, [[C2]](s1), %bb.4 ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY21:%[0-9]+]]:sreg_32(s1) = COPY [[C4]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_5:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY20]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_5:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY6]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_5:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_5]](s1), [[S_AND_B32_5]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY22:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_5]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_6:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY19]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_6:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY21]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_6:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_6]](s1), [[S_AND_B32_6]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY23:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_6]](s1) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY17]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI8]](s1), %bb.2, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.1 bb.0: successors: %bb.7(0x80000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll index 431da1de1fd48..54881f59096c1 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s -; REQUIRES: do-not-run-me +; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s define void @temporal_divergent_i1_phi(float %val, ptr %addr) { ; GFX10-LABEL: temporal_divergent_i1_phi: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir index 64d740287d9fc..9c2d083d0aa1d 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-i1.mir @@ -1,9 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s - -# Test is updated but copies between S1-register-with-reg-class and -# register-with-reg-class-no-LLT fail machine verification -# REQUIRES: do-not-run-me-with-machine-verifier +# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering %s -o - | FileCheck -check-prefix=GFX10 %s --- name: temporal_divergent_i1_phi @@ -21,37 +17,30 @@ body: | ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI3]](s1) - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]] - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32) ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY4]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY3]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[PHI2]](s1), %bb.1 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[C5]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -108,37 +97,30 @@ body: | ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 true ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; GFX10-NEXT: [[DEF:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.2(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32 = PHI [[DEF]](s1), %bb.0, %22(s1), %bb.1 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1 - ; GFX10-NEXT: [[COPY3:%[0-9]+]]:sreg_32(s1) = COPY [[PHI]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %7(s32), %bb.1, [[C1]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C1]](s32), %bb.0, %9(s32), %bb.1 + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s1) = G_PHI [[C]](s1), %bb.0, %11(s1), %bb.1 ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI3]], [[C2]] - ; GFX10-NEXT: [[COPY4:%[0-9]+]]:sreg_32(s1) = COPY [[XOR]](s1) - ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI2]](s32) + ; GFX10-NEXT: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[PHI2]], [[C2]] + ; GFX10-NEXT: [[UITOFP:%[0-9]+]]:_(s32) = G_UITOFP [[PHI1]](s32) ; GFX10-NEXT: [[FCMP:%[0-9]+]]:_(s1) = G_FCMP floatpred(ogt), [[UITOFP]](s32), [[COPY]] ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI2]], [[C3]] - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI1]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY3]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY4]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C3]] + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[FCMP]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.2: + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[XOR]](s1), %bb.1 ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.1 - ; GFX10-NEXT: [[COPY5:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI4]](s32) ; GFX10-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 0.000000e+00 ; GFX10-NEXT: [[C5:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 - ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[COPY5]](s1), [[C5]], [[C4]] + ; GFX10-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[PHI3]](s1), [[C5]], [[C4]] ; GFX10-NEXT: G_STORE [[SELECT]](s32), [[MV]](p0) :: (store (s32)) ; GFX10-NEXT: SI_RETURN bb.0: @@ -201,31 +183,20 @@ body: | ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY5]](s32), [[COPY6]](s32) ; GFX10-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF - ; GFX10-NEXT: [[DEF1:%[0-9]+]]:sreg_32(s1) = IMPLICIT_DEF - ; GFX10-NEXT: [[DEF2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = IMPLICIT_DEF ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.1: ; GFX10-NEXT: successors: %bb.3(0x50000000), %bb.5(0x30000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI:%[0-9]+]]:sreg_32_xm0_xexec = PHI [[DEF2]](s1), %bb.0, %53(s1), %bb.5 - ; GFX10-NEXT: [[PHI1:%[0-9]+]]:sreg_32 = PHI [[DEF1]](s1), %bb.0, %42, %bb.5 - ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI %13(s32), %bb.5, [[C]](s32), %bb.0 - ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %15(s32), %bb.5 - ; GFX10-NEXT: [[COPY7:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI]] - ; GFX10-NEXT: [[COPY8:%[0-9]+]]:sreg_32(s1) = COPY [[PHI1]] + ; GFX10-NEXT: [[PHI:%[0-9]+]]:_(s32) = G_PHI %13(s32), %bb.5, [[C]](s32), %bb.0 + ; GFX10-NEXT: [[PHI1:%[0-9]+]]:_(s32) = G_PHI [[C]](s32), %bb.0, %15(s32), %bb.5 ; GFX10-NEXT: [[C1:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; GFX10-NEXT: [[COPY9:%[0-9]+]]:sreg_32(s1) = COPY [[C1]](s1) - ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI3]](s32) + ; GFX10-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[PHI1]](s32) ; GFX10-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; GFX10-NEXT: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[SEXT]], [[C2]](s32) ; GFX10-NEXT: [[PTR_ADD:%[0-9]+]]:_(p1) = G_PTR_ADD [[MV1]], [[SHL]](s64) ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p1) :: (load (s32), addrspace 1) ; GFX10-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; GFX10-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[LOAD]](s32), [[C3]] - ; GFX10-NEXT: [[S_ANDN2_B32_:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY8]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY9]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_]](s1), [[S_AND_B32_]](s1), implicit-def $scc - ; GFX10-NEXT: [[COPY10:%[0-9]+]]:sreg_32(s1) = COPY [[S_OR_B32_]](s1) ; GFX10-NEXT: G_BRCOND [[ICMP]](s1), %bb.3 ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} @@ -247,12 +218,8 @@ body: | ; GFX10-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 ; GFX10-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[LOAD1]], [[C7]] ; GFX10-NEXT: G_STORE [[ADD]](s32), [[PTR_ADD1]](p1) :: (store (s32), addrspace 1) - ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI3]], [[C7]] - ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI3]](s32), [[COPY2]] - ; GFX10-NEXT: [[COPY11:%[0-9]+]]:sreg_32(s1) = COPY [[ICMP1]](s1) - ; GFX10-NEXT: [[S_ANDN2_B32_1:%[0-9]+]]:sreg_32(s1) = S_ANDN2_B32 [[COPY10]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_1:%[0-9]+]]:sreg_32(s1) = S_AND_B32 $exec_lo, [[COPY11]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_1:%[0-9]+]]:sreg_32(s1) = S_OR_B32 [[S_ANDN2_B32_1]](s1), [[S_AND_B32_1]](s1), implicit-def $scc + ; GFX10-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[PHI1]], [[C7]] + ; GFX10-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ult), [[PHI1]](s32), [[COPY2]] ; GFX10-NEXT: G_BR %bb.5 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.4: @@ -262,25 +229,20 @@ body: | ; GFX10-NEXT: bb.5: ; GFX10-NEXT: successors: %bb.6(0x04000000), %bb.1(0x7c000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI4:%[0-9]+]]:sreg_32 = PHI [[S_OR_B32_]](s1), %bb.1, [[S_OR_B32_1]](s1), %bb.3 - ; GFX10-NEXT: [[PHI5:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 - ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s1) = G_PHI [[C5]](s1), %bb.3, [[C1]](s1), %bb.1 - ; GFX10-NEXT: [[COPY12:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[PHI6]](s1) - ; GFX10-NEXT: [[COPY13:%[0-9]+]]:sreg_32(s1) = COPY [[PHI4]] - ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[COPY13]](s1), [[PHI2]](s32) - ; GFX10-NEXT: [[S_ANDN2_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_ANDN2_B32 [[COPY7]](s1), $exec_lo, implicit-def $scc - ; GFX10-NEXT: [[S_AND_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_AND_B32 $exec_lo, [[COPY12]](s1), implicit-def $scc - ; GFX10-NEXT: [[S_OR_B32_2:%[0-9]+]]:sreg_32_xm0_xexec(s1) = S_OR_B32 [[S_ANDN2_B32_2]](s1), [[S_AND_B32_2]](s1), implicit-def $scc + ; GFX10-NEXT: [[PHI2:%[0-9]+]]:_(s32) = G_PHI [[ADD1]](s32), %bb.3, [[DEF]](s32), %bb.1 + ; GFX10-NEXT: [[PHI3:%[0-9]+]]:_(s1) = G_PHI [[C5]](s1), %bb.3, [[C1]](s1), %bb.1 + ; GFX10-NEXT: [[PHI4:%[0-9]+]]:_(s1) = G_PHI [[ICMP1]](s1), %bb.3, [[C1]](s1), %bb.1 + ; GFX10-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:sreg_32_xm0_xexec(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.if.break), [[PHI4]](s1), [[PHI]](s32) ; GFX10-NEXT: SI_LOOP [[INTRINSIC_CONVERGENT]](s32), %bb.1, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.6 ; GFX10-NEXT: {{ $}} ; GFX10-NEXT: bb.6: ; GFX10-NEXT: successors: %bb.2(0x40000000), %bb.4(0x40000000) ; GFX10-NEXT: {{ $}} - ; GFX10-NEXT: [[PHI7:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5 - ; GFX10-NEXT: [[COPY14:%[0-9]+]]:sreg_32_xm0_xexec(s1) = COPY [[S_OR_B32_2]](s1) - ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI7]](s32) - ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[COPY14]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec + ; GFX10-NEXT: [[PHI5:%[0-9]+]]:sreg_32_xm0_xexec(s1) = G_PHI [[PHI3]](s1), %bb.5 + ; GFX10-NEXT: [[PHI6:%[0-9]+]]:_(s32) = G_PHI [[INTRINSIC_CONVERGENT]](s32), %bb.5 + ; GFX10-NEXT: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.end.cf), [[PHI6]](s32) + ; GFX10-NEXT: [[SI_IF:%[0-9]+]]:sreg_32_xm0_xexec(s32) = SI_IF [[PHI5]](s1), %bb.4, implicit-def $exec, implicit-def $scc, implicit $exec ; GFX10-NEXT: G_BR %bb.2 bb.0: successors: %bb.1(0x80000000) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll index b21e6a729dbc2..d065f228a4e4a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 -; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s define void @temporal_divergent_i32(float %val, ptr %addr) { ; GFX10-LABEL: temporal_divergent_i32: diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir index cd6a0f8297a5a..4cc68029489e9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-temporal-divergent-reg.mir @@ -1,5 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s +# RUN: llc -global-isel -mtriple=amdgcn-mesa-amdpal -mcpu=gfx1010 -run-pass=amdgpu-global-isel-divergence-lowering %s -o - | FileCheck -check-prefix=GFX10 %s --- name: temporal_divergent_i32 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll index 6384c47398fce..25e2267fdee89 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -1,6 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -global-isel -amdgpu-global-isel-risky-select -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck %s -; REQUIRES: do-not-run-me ; Make sure the branch targets are correct after lowering llvm.amdgcn.if diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir index c7d45f062d0d2..c7b7a84e821f9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-phi.mir @@ -422,7 +422,7 @@ body: | G_BR %bb.2 bb.2: - %6:sgpr(s32) = G_PHI %0(s32), %bb.0, %5(s32), %bb.1 + %6:sgpr(s32) = PHI %0(s32), %bb.0, %5(s32), %bb.1 $sgpr0 = COPY %6(s32) S_SETPC_B64 undef $sgpr30_sgpr31 @@ -476,7 +476,7 @@ body: | G_BR %bb.2 bb.2: - %6:vgpr(s32) = G_PHI %0(s32), %bb.0, %5(s32), %bb.1 + %6:vgpr(s32) = PHI %0(s32), %bb.0, %5(s32), %bb.1 $vgpr0 = COPY %6 S_SETPC_B64 undef $sgpr30_sgpr31 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll index 4caf83774bbba..a5482bd5b79a9 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll @@ -5,7 +5,6 @@ ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX10_W64 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefix=GFX11_W32 %s ; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+wavefrontsize64 < %s | FileCheck --check-prefix=GFX11_W64 %s -; REQUIRES: do-not-run-me define float @v_div_fmas_f32(float %a, float %b, float %c, i1 %d) { ; GFX7-LABEL: v_div_fmas_f32: