Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
AMDGPU/SI: Handle infinite loop for the structurizer to work with CFG…
… with infinite loops. Summary: The current StructurizeCFG pass only works for CFG with one exit. AMDGPUUnifyDivergentExitNodes combines multiple "return" blocks and/or "unreachable" blocks to one exit block for the Structurizer to work. However, infinite loop is another kind of special "exit", and if we don't handle it, the case of multiple exits will prevent the structurizer from working. In this work, for each infinite loop, we add a dummy edge to the "return" block, and thus the AMDGPUUnifyDivergentExitNodes pass will work with infinite loops. This will make CFG with infinite loops be structurized. Reviewer: nhaehnle Differential Revision: https://reviews.llvm.org/D46340 llvm-svn: 332625
- Loading branch information
Changpeng Fang
committed
May 17, 2018
1 parent
daf5169
commit 391bcf8
Showing
6 changed files
with
202 additions
and
15 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,167 @@ | ||
; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s | ||
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s | ||
; RUN: opt -mtriple=amdgcn-- -S -amdgpu-unify-divergent-exit-nodes -verify %s | FileCheck -check-prefix=IR %s | ||
|
||
; SI-LABEL: {{^}}infinite_loop: | ||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 | ||
; SI: BB0_1: | ||
; SI: [[LOOP:BB[0-9]+_[0-9]+]]: ; %loop | ||
; SI: s_waitcnt lgkmcnt(0) | ||
; SI: buffer_store_dword [[REG]] | ||
; SI: s_branch BB0_1 | ||
; SI: s_branch [[LOOP]] | ||
define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) { | ||
entry: | ||
br label %for.body | ||
br label %loop | ||
|
||
for.body: ; preds = %entry, %for.body | ||
loop: | ||
store i32 999, i32 addrspace(1)* %out, align 4 | ||
br label %for.body | ||
br label %loop | ||
} | ||
|
||
|
||
; IR-LABEL: @infinite_loop_ret( | ||
; IR: br i1 %cond, label %loop, label %UnifiedReturnBlock | ||
|
||
; IR: loop: | ||
; IR: store i32 999, i32 addrspace(1)* %out, align 4 | ||
; IR: br i1 true, label %loop, label %UnifiedReturnBlock | ||
|
||
; IR: UnifiedReturnBlock: | ||
; IR: ret void | ||
|
||
|
||
; SI-LABEL: {{^}}infinite_loop_ret: | ||
; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]] | ||
|
||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 | ||
; SI: [[LOOP:BB[0-9]+_[0-9]+]]: ; %loop | ||
; SI: s_and_b64 vcc, exec, -1 | ||
; SI: s_waitcnt lgkmcnt(0) | ||
; SI: buffer_store_dword [[REG]] | ||
; SI: s_cbranch_vccnz [[LOOP]] | ||
|
||
; SI: [[RET]]: ; %UnifiedReturnBlock | ||
; SI: s_endpgm | ||
define amdgpu_kernel void @infinite_loop_ret(i32 addrspace(1)* %out) { | ||
entry: | ||
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() | ||
%cond = icmp eq i32 %tmp, 1 | ||
br i1 %cond, label %loop, label %return | ||
|
||
loop: | ||
store i32 999, i32 addrspace(1)* %out, align 4 | ||
br label %loop | ||
|
||
return: | ||
ret void | ||
} | ||
|
||
|
||
; IR-LABEL: @infinite_loops( | ||
; IR: br i1 undef, label %loop1, label %loop2 | ||
|
||
; IR: loop1: | ||
; IR: store i32 999, i32 addrspace(1)* %out, align 4 | ||
; IR: br i1 true, label %loop1, label %DummyReturnBlock | ||
|
||
; IR: loop2: | ||
; IR: store i32 888, i32 addrspace(1)* %out, align 4 | ||
; IR: br i1 true, label %loop2, label %DummyReturnBlock | ||
|
||
; IR: DummyReturnBlock: | ||
; IR: ret void | ||
|
||
|
||
; SI-LABEL: {{^}}infinite_loops: | ||
|
||
; SI: v_mov_b32_e32 [[REG1:v[0-9]+]], 0x3e7 | ||
; SI: s_and_b64 vcc, exec, -1 | ||
|
||
; SI: [[LOOP1:BB[0-9]+_[0-9]+]]: ; %loop1 | ||
; SI: s_waitcnt lgkmcnt(0) | ||
; SI: buffer_store_dword [[REG1]] | ||
; SI: s_cbranch_vccnz [[LOOP1]] | ||
; SI: s_branch [[RET:BB[0-9]+_[0-9]+]] | ||
|
||
; SI: v_mov_b32_e32 [[REG2:v[0-9]+]], 0x378 | ||
; SI: s_and_b64 vcc, exec, -1 | ||
|
||
; SI: [[LOOP2:BB[0-9]+_[0-9]+]]: ; %loop2 | ||
; SI: s_waitcnt lgkmcnt(0) | ||
; SI: buffer_store_dword [[REG2]] | ||
; SI: s_cbranch_vccnz [[LOOP2]] | ||
|
||
; SI: [[RET]]: ; %DummyReturnBlock | ||
; SI: s_endpgm | ||
define amdgpu_kernel void @infinite_loops(i32 addrspace(1)* %out) { | ||
entry: | ||
br i1 undef, label %loop1, label %loop2 | ||
|
||
loop1: | ||
store i32 999, i32 addrspace(1)* %out, align 4 | ||
br label %loop1 | ||
|
||
loop2: | ||
store i32 888, i32 addrspace(1)* %out, align 4 | ||
br label %loop2 | ||
} | ||
|
||
|
||
|
||
; IR-LABEL: @infinite_loop_nest_ret( | ||
; IR: br i1 %cond1, label %outer_loop, label %UnifiedReturnBlock | ||
|
||
; IR: outer_loop: | ||
; IR: br label %inner_loop | ||
|
||
; IR: inner_loop: | ||
; IR: store i32 999, i32 addrspace(1)* %out, align 4 | ||
; IR: %cond3 = icmp eq i32 %tmp, 3 | ||
; IR: br i1 true, label %TransitionBlock, label %UnifiedReturnBlock | ||
|
||
; IR: TransitionBlock: | ||
; IR: br i1 %cond3, label %inner_loop, label %outer_loop | ||
|
||
; IR: UnifiedReturnBlock: | ||
; IR: ret void | ||
|
||
; SI-LABEL: {{^}}infinite_loop_nest_ret: | ||
; SI: s_cbranch_execz [[RET:BB[0-9]+_[0-9]+]] | ||
|
||
; SI: s_mov_b32 | ||
; SI: [[OUTER_LOOP:BB[0-9]+_[0-9]+]]: ; %outer_loop | ||
|
||
; SI: [[INNER_LOOP:BB[0-9]+_[0-9]+]]: ; %inner_loop | ||
; SI: s_waitcnt expcnt(0) | ||
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7 | ||
; SI: v_cmp_ne_u32_e32 | ||
; SI: s_waitcnt lgkmcnt(0) | ||
; SI: buffer_store_dword [[REG]] | ||
|
||
; SI: s_andn2_b64 exec | ||
; SI: s_cbranch_execnz [[INNER_LOOP]] | ||
|
||
; SI: s_andn2_b64 exec | ||
; SI: s_cbranch_execnz [[OUTER_LOOP]] | ||
|
||
; SI: [[RET]]: ; %UnifiedReturnBlock | ||
; SI: s_endpgm | ||
define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) { | ||
entry: | ||
%tmp = tail call i32 @llvm.amdgcn.workitem.id.x() | ||
%cond1 = icmp eq i32 %tmp, 1 | ||
br i1 %cond1, label %outer_loop, label %return | ||
|
||
outer_loop: | ||
; %cond2 = icmp eq i32 %tmp, 2 | ||
; br i1 %cond2, label %outer_loop, label %inner_loop | ||
br label %inner_loop | ||
|
||
inner_loop: ; preds = %LeafBlock, %LeafBlock1 | ||
store i32 999, i32 addrspace(1)* %out, align 4 | ||
%cond3 = icmp eq i32 %tmp, 3 | ||
br i1 %cond3, label %inner_loop, label %outer_loop | ||
|
||
return: | ||
ret void | ||
} | ||
|
||
declare i32 @llvm.amdgcn.workitem.id.x() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters