Skip to content

Commit

Permalink
[NFC][AMDGPU] autogenerate kill-infinite-loop.ll checks
Browse files Browse the repository at this point in the history
This would help us to track the assembly changes to these tests.

Reviewed by: foad

Differential Revision: https://reviews.llvm.org/D105609
  • Loading branch information
ruiling committed Jul 14, 2021
1 parent 40e3df2 commit 1d9585c
Showing 1 changed file with 113 additions and 20 deletions.
133 changes: 113 additions & 20 deletions llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope %s

; Although it's modeled without any control flow in order to get better code
Expand All @@ -8,12 +9,41 @@
; this case right before the s_endpgm to avoid GPU hangs, which is what this
; tests.

; CHECK-LABEL: return_void
; Make sure that we remove the done bit from the original export
; CHECK: exp mrt0 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} vm
; CHECK: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
define amdgpu_ps void @return_void(float %0) #0 {
; CHECK-LABEL: return_void:
; CHECK: ; %bb.0: ; %main_body
; CHECK-NEXT: s_mov_b64 s[0:1], exec
; CHECK-NEXT: s_mov_b32 s2, 0x41200000
; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; CHECK-NEXT: s_cbranch_execz BB0_3
; CHECK-NEXT: BB0_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; CHECK-NEXT: s_cbranch_scc0 BB0_6
; CHECK-NEXT: ; %bb.2: ; %loop
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: s_mov_b64 vcc, 0
; CHECK-NEXT: s_branch BB0_1
; CHECK-NEXT: BB0_3: ; %Flow1
; CHECK-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
; CHECK-NEXT: s_xor_b64 exec, exec, s[0:1]
; CHECK-NEXT: s_cbranch_execz BB0_5
; CHECK-NEXT: ; %bb.4: ; %end
; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: exp mrt0 v1, v1, v1, v0 vm
; CHECK-NEXT: BB0_5: ; %UnifiedReturnBlock
; CHECK-NEXT: s_waitcnt expcnt(0)
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
; CHECK-NEXT: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: BB0_6:
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
main_body:
%cmp = fcmp olt float %0, 1.000000e+01
br i1 %cmp, label %end, label %loop
Expand All @@ -27,12 +57,40 @@ end:
ret void
}

; Check that we also remove the done bit from compressed exports correctly.
; CHECK-LABEL: return_void_compr
; CHECK: exp mrt0 v{{[0-9]+}}, off, v{{[0-9]+}}, off compr vm
; CHECK: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
define amdgpu_ps void @return_void_compr(float %0) #0 {
; CHECK-LABEL: return_void_compr:
; CHECK: ; %bb.0: ; %main_body
; CHECK-NEXT: s_mov_b64 s[0:1], exec
; CHECK-NEXT: s_mov_b32 s2, 0x41200000
; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; CHECK-NEXT: s_cbranch_execz BB1_3
; CHECK-NEXT: BB1_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; CHECK-NEXT: s_cbranch_scc0 BB1_6
; CHECK-NEXT: ; %bb.2: ; %loop
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: s_mov_b64 vcc, 0
; CHECK-NEXT: s_branch BB1_1
; CHECK-NEXT: BB1_3: ; %Flow1
; CHECK-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
; CHECK-NEXT: s_xor_b64 exec, exec, s[0:1]
; CHECK-NEXT: s_cbranch_execz BB1_5
; CHECK-NEXT: ; %bb.4: ; %end
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: exp mrt0 v0, off, v0, off compr vm
; CHECK-NEXT: BB1_5: ; %UnifiedReturnBlock
; CHECK-NEXT: s_waitcnt expcnt(0)
; CHECK-NEXT: s_or_b64 exec, exec, s[0:1]
; CHECK-NEXT: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: BB1_6:
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
main_body:
%cmp = fcmp olt float %0, 1.000000e+01
br i1 %cmp, label %end, label %loop
Expand All @@ -47,13 +105,26 @@ end:
}

; test the case where there's only a kill in an infinite loop
; CHECK-LABEL: only_kill
; CHECK: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
; SILateBranchLowering inserts an extra null export here, but it should be harmless.
; CHECK: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
define amdgpu_ps void @only_kill() #0 {
; CHECK-LABEL: only_kill:
; CHECK: ; %bb.0: ; %main_body
; CHECK-NEXT: s_mov_b64 s[0:1], exec
; CHECK-NEXT: BB2_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; CHECK-NEXT: s_cbranch_scc0 BB2_4
; CHECK-NEXT: ; %bb.2: ; %loop
; CHECK-NEXT: ; in Loop: Header=BB2_1 Depth=1
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: s_mov_b64 vcc, exec
; CHECK-NEXT: s_cbranch_execnz BB2_1
; CHECK-NEXT: ; %bb.3: ; %UnifiedReturnBlock
; CHECK-NEXT: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: BB2_4:
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
main_body:
br label %loop

Expand All @@ -63,11 +134,33 @@ loop:
}

; Check that the epilog is the final block
; CHECK-LABEL: return_nonvoid
; CHECK: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: BB{{[0-9]+}}_{{[0-9]+}}:
define amdgpu_ps float @return_nonvoid(float %0) #0 {
; CHECK-LABEL: return_nonvoid:
; CHECK: ; %bb.0: ; %main_body
; CHECK-NEXT: s_mov_b64 s[0:1], exec
; CHECK-NEXT: s_mov_b32 s2, 0x41200000
; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; CHECK-NEXT: s_cbranch_execz BB3_3
; CHECK-NEXT: BB3_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; CHECK-NEXT: s_cbranch_scc0 BB3_4
; CHECK-NEXT: ; %bb.2: ; %loop
; CHECK-NEXT: ; in Loop: Header=BB3_1 Depth=1
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: s_mov_b64 vcc, exec
; CHECK-NEXT: s_cbranch_execnz BB3_1
; CHECK-NEXT: BB3_3: ; %Flow1
; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_branch BB3_5
; CHECK-NEXT: BB3_4:
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: BB3_5:
main_body:
%cmp = fcmp olt float %0, 1.000000e+01
br i1 %cmp, label %end, label %loop
Expand Down

0 comments on commit 1d9585c

Please sign in to comment.