diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index caf21086cc377b..1e90e6ba541838 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -51,6 +51,7 @@ #include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" #include "MCTargetDesc/AMDGPUMCTargetDesc.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" @@ -86,7 +87,7 @@ class SILowerControlFlow : public MachineFunctionPass { const SIInstrInfo *TII = nullptr; LiveIntervals *LIS = nullptr; MachineRegisterInfo *MRI = nullptr; - DenseSet LoweredEndCf; + SetVector LoweredEndCf; DenseSet LoweredIf; const TargetRegisterClass *BoolRC = nullptr; @@ -117,6 +118,9 @@ class SILowerControlFlow : public MachineFunctionPass { skipIgnoreExecInstsTrivialSucc(MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const; + // Remove redundant SI_END_CF instructions. + void optimizeEndCf(); + public: static char ID; @@ -448,29 +452,6 @@ void SILowerControlFlow::emitEndCf(MachineInstr &MI) { MachineInstr *Def = MRI.getUniqueVRegDef(CFMask); const DebugLoc &DL = MI.getDebugLoc(); - // If the only instruction immediately following this END_CF is an another - // END_CF in the only successor we can avoid emitting exec mask restore here. - if (RemoveRedundantEndcf) { - auto Next = - skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI.getIterator())); - if (Next != MBB.end() && (Next->getOpcode() == AMDGPU::SI_END_CF || - LoweredEndCf.count(&*Next))) { - // Only skip inner END_CF if outer ENDCF belongs to SI_IF. - // If that belongs to SI_ELSE then saved mask has an inverted value. - Register SavedExec = Next->getOperand(0).getReg(); - const MachineInstr *Def = MRI.getUniqueVRegDef(SavedExec); - // A lowered SI_IF turns definition into COPY of exec. - if (Def && (Def->getOpcode() == AMDGPU::SI_IF || - LoweredIf.count(SavedExec))) { - LLVM_DEBUG(dbgs() << "Skip redundant "; MI.dump()); - if (LIS) - LIS->RemoveMachineInstrFromMaps(MI); - MI.eraseFromParent(); - return; - } - } - } - MachineBasicBlock::iterator InsPt = Def && Def->getParent() == &MBB ? std::next(MachineBasicBlock::iterator(Def)) : MBB.begin(); @@ -544,6 +525,34 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) { MRI->getUniqueVRegDef(Reg)->eraseFromParent(); } +void SILowerControlFlow::optimizeEndCf() { + // If the only instruction immediately following this END_CF is an another + // END_CF in the only successor we can avoid emitting exec mask restore here. + if (!RemoveRedundantEndcf) + return; + + for (MachineInstr *MI : LoweredEndCf) { + MachineBasicBlock &MBB = *MI->getParent(); + auto Next = + skipIgnoreExecInstsTrivialSucc(MBB, std::next(MI->getIterator())); + if (Next == MBB.end() || !LoweredEndCf.count(&*Next)) + continue; + // Only skip inner END_CF if outer ENDCF belongs to SI_IF. + // If that belongs to SI_ELSE then saved mask has an inverted value. + Register SavedExec + = TII->getNamedOperand(*Next, AMDGPU::OpName::src1)->getReg(); + assert(SavedExec.isVirtual() && "Expected saved exec to be src1!"); + + const MachineInstr *Def = MRI->getUniqueVRegDef(SavedExec); + if (Def && LoweredIf.count(SavedExec)) { + LLVM_DEBUG(dbgs() << "Skip redundant "; MI->dump()); + if (LIS) + LIS->RemoveMachineInstrFromMaps(*MI); + MI->eraseFromParent(); + } + } +} + bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { const GCNSubtarget &ST = MF.getSubtarget(); TII = ST.getInstrInfo(); @@ -626,6 +635,8 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { } } + optimizeEndCf(); + LoweredEndCf.clear(); LoweredIf.clear(); diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir index 8bb1d134154d14..815251e3560cec 100644 --- a/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir +++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf.mir @@ -533,3 +533,63 @@ body: | S_ENDPGM 0 ... + +--- +name: if_inside_loop +tracksRegLiveness: true +machineFunctionInfo: + isEntryFunction: true +body: | + ; GCN-LABEL: name: if_inside_loop + ; GCN: bb.0: + ; GCN: successors: %bb.6(0x80000000) + ; GCN: S_BRANCH %bb.6 + ; GCN: bb.1: + ; GCN: successors: %bb.3(0x40000000), %bb.2(0x40000000) + ; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY]], undef %1:sreg_64, implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_]] + ; GCN: S_CBRANCH_EXECZ %bb.3, implicit $exec + ; GCN: bb.2: + ; GCN: successors: %bb.6(0x80000000) + ; GCN: S_BRANCH %bb.6 + ; GCN: bb.3: + ; GCN: successors: %bb.4(0x80000000) + ; GCN: $exec = S_OR_B64 $exec, [[COPY]], implicit-def $scc + ; GCN: bb.4: + ; GCN: successors: %bb.5(0x80000000) + ; GCN: $exec = S_OR_B64 $exec, %2, implicit-def $scc + ; GCN: bb.5: + ; GCN: successors: %bb.6(0x80000000) + ; GCN: bb.6: + ; GCN: successors: %bb.4(0x40000000), %bb.0(0x40000000) + ; GCN: [[COPY1:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec + ; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY1]], undef %3:sreg_64, implicit-def dead $scc + ; GCN: [[S_XOR_B64_:%[0-9]+]]:sreg_64 = S_XOR_B64 [[S_AND_B64_1]], [[COPY1]], implicit-def dead $scc + ; GCN: $exec = S_MOV_B64_term killed [[S_AND_B64_1]] + ; GCN: S_CBRANCH_EXECZ %bb.4, implicit $exec + ; GCN: S_BRANCH %bb.0 + ; GCN: S_ENDPGM 0 + bb.0: + S_BRANCH %bb.6 + + bb.1: + %0:sreg_64 = SI_IF undef %1:sreg_64, %bb.3, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + + bb.2: + S_BRANCH %bb.6 + + bb.3: + SI_END_CF %0:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + + bb.4: + SI_END_CF %2:sreg_64, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + + bb.5: + + bb.6: + %2:sreg_64 = SI_IF undef %3:sreg_64, %bb.4, implicit-def dead $exec, implicit-def dead $scc, implicit $exec + S_BRANCH %bb.0 + S_ENDPGM 0 + +...