Skip to content

Commit

Permalink
[AMDGPU] Eliminate no effect instructions before s_endpgm
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D36585

llvm-svn: 310987
  • Loading branch information
rampitec committed Aug 16, 2017
1 parent 0c6374e commit a9487d9
Show file tree
Hide file tree
Showing 15 changed files with 406 additions and 32 deletions.
66 changes: 63 additions & 3 deletions llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
Expand Up @@ -111,9 +111,62 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
const SIInstrInfo *TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
DenseSet<unsigned> RecalcRegs({AMDGPU::EXEC_LO, AMDGPU::EXEC_HI});
bool Changed = false;

for (MachineBasicBlock &MBB : MF) {

// Try to remove unneeded instructions before s_endpgm.
if (MBB.succ_empty()) {
if (MBB.empty() || MBB.back().getOpcode() != AMDGPU::S_ENDPGM)
continue;

SmallVector<MachineBasicBlock*, 4> Blocks({&MBB});

while (!Blocks.empty()) {
auto CurBB = Blocks.pop_back_val();
auto I = CurBB->rbegin(), E = CurBB->rend();
if (I != E) {
if (I->isUnconditionalBranch() || I->getOpcode() == AMDGPU::S_ENDPGM)
++I;
else if (I->isBranch())
continue;
}

while (I != E) {
if (I->isDebugValue())
continue;
if (I->mayStore() || I->isBarrier() || I->isCall() ||
I->hasUnmodeledSideEffects() || I->hasOrderedMemoryRef())
break;

DEBUG(dbgs() << "Removing no effect instruction: " << *I << '\n');

for (auto &Op : I->operands())
if (Op.isReg())
RecalcRegs.insert(Op.getReg());

auto Next = std::next(I);
LIS->RemoveMachineInstrFromMaps(*I);
I->eraseFromParent();
I = Next;

Changed = true;
}

if (I != E)
continue;

// Try to ascend predecessors.
for (auto *Pred : CurBB->predecessors()) {
if (Pred->succ_size() == 1)
Blocks.push_back(Pred);
}
}
continue;
}

// Try to collapse adjacent endifs.
auto Lead = MBB.begin(), E = MBB.end();
if (MBB.succ_size() != 1 || Lead == E || !isEndCF(*Lead, TRI))
continue;
Expand Down Expand Up @@ -174,9 +227,16 @@ bool SIOptimizeExecMaskingPreRA::runOnMachineFunction(MachineFunction &MF) {
}

if (Changed) {
// Recompute liveness for both reg units of exec.
LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC_LO, TRI));
LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC_HI, TRI));
for (auto Reg : RecalcRegs) {
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
LIS->removeInterval(Reg);
if (!MRI.reg_empty(Reg))
LIS->createAndComputeVirtRegInterval(Reg);
} else {
for (MCRegUnitIterator U(Reg, TRI); U.isValid(); ++U)
LIS->removeRegUnit(*U);
}
}
}

return Changed;
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/AMDGPU/branch-condition-and.ll
Expand Up @@ -20,7 +20,6 @@
; GCN: ds_write_b32

; GCN: [[BB5]]
; GCN: s_or_b64 exec, exec
; GCN-NEXT: s_endpgm
; GCN-NEXT: .Lfunc_end
define amdgpu_ps void @ham(float %arg, float %arg1) #0 {
Expand Down
35 changes: 28 additions & 7 deletions llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
Expand Up @@ -9,7 +9,6 @@
; GCN-NEXT: {{^BB[0-9_]+}}:
; GCN: store_dword
; GCN-NEXT: {{^}}[[ENDIF]]:
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @simple_nested_if(i32 addrspace(1)* nocapture %arg) {
bb:
Expand Down Expand Up @@ -45,7 +44,6 @@ bb.outer.end: ; preds = %bb.outer.then, %bb.
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER]]
; GCN: store_dword
; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @uncollapsable_nested_if(i32 addrspace(1)* nocapture %arg) {
bb:
Expand Down Expand Up @@ -90,7 +88,6 @@ bb.outer.end: ; preds = %bb.inner.then, %bb
; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
; GCN: store_dword
; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @nested_if_if_else(i32 addrspace(1)* nocapture %arg) {
bb:
Expand Down Expand Up @@ -141,13 +138,10 @@ bb.outer.end: ; preds = %bb, %bb.then, %b
; GCN-NEXT: {{^BB[0-9_]+}}:
; GCN: store_dword
; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_THEN:s\[[0-9:]+\]]]
; GCN-NEXT: ; mask branch [[ENDIF_INNER_OUTER_THEN:BB[0-9_]+]]
; GCN-NEXT: ; mask branch [[ENDIF_OUTER]]
; GCN-NEXT: {{^BB[0-9_]+}}:
; GCN: store_dword
; GCN-NEXT: {{^}}[[ENDIF_INNER_OUTER_THEN]]:
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_THEN]]
; GCN-NEXT: {{^}}[[ENDIF_OUTER]]:
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_OUTER3]]
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @nested_if_else_if(i32 addrspace(1)* nocapture %arg) {
bb:
Expand Down Expand Up @@ -183,6 +177,33 @@ bb.outer.end:
ret void
}

; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier:
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9_]+]]
; GCN-NEXT: {{^BB[0-9_]+}}:
; GCN: store_dword
; GCN-NEXT: {{^}}[[ENDIF]]:
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC]]
; GCN: s_barrier
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @s_endpgm_unsafe_barrier(i32 addrspace(1)* nocapture %arg) {
bb:
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
%tmp1 = icmp ugt i32 %tmp, 1
br i1 %tmp1, label %bb.then, label %bb.end

bb.then: ; preds = %bb
%tmp4 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %tmp
store i32 0, i32 addrspace(1)* %tmp4, align 4
br label %bb.end

bb.end: ; preds = %bb.then, %bb
call void @llvm.amdgcn.s.barrier()
ret void
}

declare i32 @llvm.amdgcn.workitem.id.x() #0
declare void @llvm.amdgcn.s.barrier() #1

attributes #0 = { nounwind readnone speculatable }
attributes #1 = { nounwind convergent }

0 comments on commit a9487d9

Please sign in to comment.