48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AMDGPU/bypass-div.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ define i64 @sdiv64(i64 %a, i64 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz BB0_2
; GFX9-NEXT: s_cbranch_execz .LBB0_2
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
Expand Down Expand Up @@ -135,10 +135,10 @@ define i64 @sdiv64(i64 %a, i64 %b) {
; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v0, v2, vcc
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX9-NEXT: BB0_2: ; %Flow
; GFX9-NEXT: .LBB0_2: ; %Flow
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz BB0_4
; GFX9-NEXT: s_cbranch_execz .LBB0_4
; GFX9-NEXT: ; %bb.3:
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2
Expand All @@ -160,7 +160,7 @@ define i64 @sdiv64(i64 %a, i64 %b) {
; GFX9-NEXT: v_add_u32_e32 v3, 1, v1
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc
; GFX9-NEXT: BB0_4:
; GFX9-NEXT: .LBB0_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_mov_b32_e32 v0, v4
; GFX9-NEXT: v_mov_b32_e32 v1, v5
Expand All @@ -179,7 +179,7 @@ define i64 @udiv64(i64 %a, i64 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz BB1_2
; GFX9-NEXT: s_cbranch_execz .LBB1_2
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2
; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3
Expand Down Expand Up @@ -285,10 +285,10 @@ define i64 @udiv64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX9-NEXT: BB1_2: ; %Flow
; GFX9-NEXT: .LBB1_2: ; %Flow
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[6:7]
; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz BB1_4
; GFX9-NEXT: s_cbranch_execz .LBB1_4
; GFX9-NEXT: ; %bb.3:
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2
Expand All @@ -310,7 +310,7 @@ define i64 @udiv64(i64 %a, i64 %b) {
; GFX9-NEXT: v_add_u32_e32 v3, 1, v1
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc
; GFX9-NEXT: BB1_4:
; GFX9-NEXT: .LBB1_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_mov_b32_e32 v0, v4
; GFX9-NEXT: v_mov_b32_e32 v1, v5
Expand All @@ -329,7 +329,7 @@ define i64 @srem64(i64 %a, i64 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz BB2_2
; GFX9-NEXT: s_cbranch_execz .LBB2_2
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
Expand Down Expand Up @@ -448,10 +448,10 @@ define i64 @srem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v6, vcc
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX9-NEXT: BB2_2: ; %Flow
; GFX9-NEXT: .LBB2_2: ; %Flow
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[8:9]
; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz BB2_4
; GFX9-NEXT: s_cbranch_execz .LBB2_4
; GFX9-NEXT: ; %bb.3:
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2
Expand All @@ -471,7 +471,7 @@ define i64 @srem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc
; GFX9-NEXT: BB2_4:
; GFX9-NEXT: .LBB2_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_mov_b32_e32 v0, v4
; GFX9-NEXT: v_mov_b32_e32 v1, v5
Expand All @@ -490,7 +490,7 @@ define i64 @urem64(i64 %a, i64 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz BB3_2
; GFX9-NEXT: s_cbranch_execz .LBB3_2
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2
; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3
Expand Down Expand Up @@ -595,10 +595,10 @@ define i64 @urem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX9-NEXT: BB3_2: ; %Flow
; GFX9-NEXT: .LBB3_2: ; %Flow
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[8:9]
; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz BB3_4
; GFX9-NEXT: s_cbranch_execz .LBB3_4
; GFX9-NEXT: ; %bb.3:
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2
Expand All @@ -618,7 +618,7 @@ define i64 @urem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc
; GFX9-NEXT: BB3_4:
; GFX9-NEXT: .LBB3_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_mov_b32_e32 v0, v4
; GFX9-NEXT: v_mov_b32_e32 v1, v5
Expand Down Expand Up @@ -763,7 +763,7 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz BB8_2
; GFX9-NEXT: s_cbranch_execz .LBB8_2
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v3
; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
Expand Down Expand Up @@ -895,10 +895,10 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v1, v7, vcc
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX9-NEXT: BB8_2: ; %Flow
; GFX9-NEXT: .LBB8_2: ; %Flow
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[10:11]
; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz BB8_4
; GFX9-NEXT: s_cbranch_execz .LBB8_4
; GFX9-NEXT: ; %bb.3:
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2
Expand All @@ -923,7 +923,7 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v3, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v4, vcc
; GFX9-NEXT: BB8_4:
; GFX9-NEXT: .LBB8_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_mov_b32_e32 v0, v4
; GFX9-NEXT: v_mov_b32_e32 v1, v5
Expand All @@ -948,7 +948,7 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) {
; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5
; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz BB9_2
; GFX9-NEXT: s_cbranch_execz .LBB9_2
; GFX9-NEXT: ; %bb.1:
; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2
; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3
Expand Down Expand Up @@ -1061,10 +1061,10 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v1, vcc
; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX9-NEXT: BB9_2: ; %Flow
; GFX9-NEXT: .LBB9_2: ; %Flow
; GFX9-NEXT: s_or_saveexec_b64 s[4:5], s[8:9]
; GFX9-NEXT: s_xor_b64 exec, exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz BB9_4
; GFX9-NEXT: s_cbranch_execz .LBB9_4
; GFX9-NEXT: ; %bb.3:
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2
; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2
Expand All @@ -1089,7 +1089,7 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2
; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v3, vcc
; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v4, vcc
; GFX9-NEXT: BB9_4:
; GFX9-NEXT: .LBB9_4:
; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX9-NEXT: v_mov_b32_e32 v0, v4
; GFX9-NEXT: v_mov_b32_e32 v1, v5
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
; GCN-NOT: s_sub_u32

; GCN: s_and_saveexec_b64
; GCN: s_cbranch_execz [[BB1:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_execz [[BB1:.LBB[0-9]+_[0-9]+]]

; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32 glc{{$}}
; GCN-NOT: s32
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/call-skip.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ define hidden void @func() #1 {

; GCN-LABEL: {{^}}if_call:
; GCN: s_and_saveexec_b64
; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]
; GCN: s_swappc_b64
; GCN: [[END]]:
define void @if_call(i32 %flag) #0 {
Expand All @@ -25,7 +25,7 @@ end:

; GCN-LABEL: {{^}}if_asm:
; GCN: s_and_saveexec_b64
; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]
; GCN: ; sample asm
; GCN: [[END]]:
define void @if_asm(i32 %flag) #0 {
Expand All @@ -42,7 +42,7 @@ end:

; GCN-LABEL: {{^}}if_call_kernel:
; GCN: s_and_saveexec_b64
; GCN-NEXT: s_cbranch_execz BB3_2
; GCN-NEXT: s_cbranch_execz .LBB3_2
; GCN: s_swappc_b64
define amdgpu_kernel void @if_call_kernel() #0 {
%id = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/cf-loop-on-constant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

; GCN-LABEL: {{^}}test_loop:
; GCN: s_and_b64 s[0:1], exec, -1
; GCN: [[LABEL:BB[0-9]+_[0-9]+]]: ; %for.body{{$}}
; GCN: [[LABEL:.LBB[0-9]+_[0-9]+]]: ; %for.body{{$}}
; GCN: ds_read_b32
; GCN: ds_write_b32
; GCN: s_cbranch_vccnz [[LABEL]]
Expand All @@ -28,7 +28,7 @@ for.body:
}

; GCN-LABEL: @loop_const_true
; GCN: [[LABEL:BB[0-9]+_[0-9]+]]:
; GCN: [[LABEL:.LBB[0-9]+_[0-9]+]]:
; GCN: ds_read_b32
; GCN: ds_write_b32
; GCN: s_branch [[LABEL]]
Expand Down Expand Up @@ -99,7 +99,7 @@ for.body:
; GCN: v_cmp_eq_u32{{[^,]*}}, 1,
; GCN: s_add_i32 s2, s0, 0x80

; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]
; GCN: [[LOOPBB:.LBB[0-9]+_[0-9]+]]
; GCN: _add_i32_e32 v0, vcc, 4, v0

; GCN: s_cbranch_{{vccz|vccnz}} [[LOOPBB]]
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-flat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

; GCN-LABEL: {{^}}test_no_sink_flat_small_offset_i32:
; GCN: flat_load_dword
; GCN: {{^}}BB0_2:
; GCN: {{^}}.LBB0_2:
define amdgpu_kernel void @test_no_sink_flat_small_offset_i32(i32* %out, i32* %in, i32 %cond) {
entry:
%out.gep = getelementptr i32, i32* %out, i64 999999
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx1030.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,13 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_csub_i32(i32 add
; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_and_saveexec_b32 s4, vcc_lo
; GCN-NEXT: s_cbranch_execz BB0_2
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 2
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_atomic_csub v0, v0, v1, s[2:3] offset:28 glc
; GCN-NEXT: BB0_2: ; %endif
; GCN-NEXT: .LBB0_2: ; %endif
; GCN-NEXT: s_or_b32 exec_lo, exec_lo, s4
; GCN-NEXT: v_mov_b32_e32 v1, 0x3d0800
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(float a
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_cbranch_execz BB0_2
; GCN-NEXT: s_cbranch_execz .LBB0_2
; GCN-NEXT: ; %bb.1: ; %if
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 2.0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_atomic_add_f32 v0, v1, s[2:3] offset:28
; GCN-NEXT: global_load_dword v0, v[0:1], off glc
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: BB0_2: ; %endif
; GCN-NEXT: .LBB0_2: ; %endif
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v1, 0x3d0000
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AMDGPU/cgp-addressing-modes.ll
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ done:

; GFX9: v_mov_b32_e32 [[VOFFSET:v[0-9]+]], 0xf000{{$}}
; GFX9: global_load_sbyte {{v[0-9]+}}, [[VOFFSET]], {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
; GCN: {{^}}BB1_2:
; GCN: {{^}}.LBB1_2:
; GCN: s_or_b64 exec
define amdgpu_kernel void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
Expand Down Expand Up @@ -76,7 +76,7 @@ done:
; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}}
; GFX9: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
; GFX9: global_load_sbyte {{v[0-9]+}}, [[ZERO]], {{s\[[0-9]+:[0-9]+\]}} offset:4095{{$}}
; GCN: {{^}}BB2_2:
; GCN: {{^}}.LBB2_2:
; GCN: s_or_b64 exec
define amdgpu_kernel void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
Expand Down Expand Up @@ -105,7 +105,7 @@ done:
; SICIVI: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}}
; GFX9: v_mov_b32_e32 [[VOFFSET:v[0-9]+]], 0x1000{{$}}
; GFX9: global_load_sbyte {{v[0-9]+}}, [[VOFFSET]], {{s\[[0-9]+:[0-9]+\]$}}
; GCN: {{^}}BB3_2:
; GCN: {{^}}.LBB3_2:
; GCN: s_or_b64 exec
define amdgpu_kernel void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) {
entry:
Expand Down Expand Up @@ -138,7 +138,7 @@ done:
; GCN: s_and_saveexec_b64
; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4092{{$}}
; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4092 glc{{$}}
; GCN: {{^}}BB4_2:
; GCN: {{^}}.LBB4_2:
define amdgpu_kernel void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
entry:
%alloca = alloca [512 x i32], align 4, addrspace(5)
Expand Down Expand Up @@ -178,7 +178,7 @@ done:
; GCN: buffer_store_dword {{v[0-9]+}}, [[BASE_FI0]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen offset:4092{{$}}
; GCN: v_mov_b32_e32 [[BASE_FI1:v[0-9]+]], 4
; GCN: buffer_load_dword {{v[0-9]+}}, [[BASE_FI1]], {{s\[[0-9]+:[0-9]+\]}}, 0 offen offset:4092 glc{{$}}
; GCN: {{^BB[0-9]+}}_2:
; GCN: {{^.LBB[0-9]+}}_2:

define amdgpu_kernel void @test_sink_scratch_small_offset_i32_reserved(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
entry:
Expand Down Expand Up @@ -216,7 +216,7 @@ done:
; GCN: s_and_saveexec_b64
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen{{$}}
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen glc{{$}}
; GCN: {{^BB[0-9]+}}_2:
; GCN: {{^.LBB[0-9]+}}_2:
define amdgpu_kernel void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) {
entry:
%alloca = alloca [512 x i32], align 4, addrspace(5)
Expand Down Expand Up @@ -248,7 +248,7 @@ done:
; GCN: s_and_saveexec_b64
; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}]
; GCN: {{^BB[0-9]+}}_2:
; GCN: {{^.LBB[0-9]+}}_2:
define amdgpu_kernel void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) {
entry:
%offset.ext = zext i32 %offset to i64
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/cgp-bitfield-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
; GCN: s_cbranch_scc{{[0-1]}}

; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70008
; GCN: BB0_3:
; GCN: .LBB0_3:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80008

; GCN: buffer_store_dword
Expand Down Expand Up @@ -115,7 +115,7 @@ ret:
; OPT: store
; OPT: ret

; For GFX8: since i16 is legal type, we cannot sink lshr into BBs.
; For GFX8: since i16 is legal type, we cannot sink lshr into .LBBs.

; GCN-LABEL: {{^}}sink_ubfe_i16:
; GCN-NOT: lshr
Expand All @@ -126,7 +126,7 @@ ret:
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70004
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0x7f

; GCN: BB2_3:
; GCN: .LBB2_3:
; SI: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80004
; VI: v_mov_b32_e32 v{{[0-9]+}}, 0xff

Expand Down Expand Up @@ -175,11 +175,11 @@ ret:

; GCN-LABEL: {{^}}sink_ubfe_i64_span_midpoint:

; GCN: s_cbranch_scc{{[0-1]}} BB3_2
; GCN: s_cbranch_scc{{[0-1]}} .LBB3_2
; GCN: v_alignbit_b32 v[[LO:[0-9]+]], s{{[0-9]+}}, v{{[0-9]+}}, 30
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0x7f, v[[LO]]

; GCN: BB3_3:
; GCN: .LBB3_3:
; GCN: v_and_b32_e32 v{{[0-9]+}}, 0xff, v[[LO]]

; GCN: buffer_store_dwordx2
Expand Down Expand Up @@ -223,11 +223,11 @@ ret:

; GCN-LABEL: {{^}}sink_ubfe_i64_low32:

; GCN: s_cbranch_scc{{[0-1]}} BB4_2
; GCN: s_cbranch_scc{{[0-1]}} .LBB4_2

; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x7000f

; GCN: BB4_3:
; GCN: .LBB4_3:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x8000f

; GCN: buffer_store_dwordx2
Expand Down Expand Up @@ -270,10 +270,10 @@ ret:
; OPT: ret

; GCN-LABEL: {{^}}sink_ubfe_i64_high32:
; GCN: s_cbranch_scc{{[0-1]}} BB5_2
; GCN: s_cbranch_scc{{[0-1]}} .LBB5_2
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x70003

; GCN: BB5_3:
; GCN: .LBB5_3:
; GCN: s_bfe_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0x80003

; GCN: buffer_store_dwordx2
Expand Down
26 changes: 13 additions & 13 deletions llvm/test/CodeGen/AMDGPU/collapse-endcf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

; GCN-LABEL: {{^}}simple_nested_if:
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]]
; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]]
; GCN: s_and_b64 exec, exec, vcc
; GCN-NEXT: s_cbranch_execz [[ENDIF]]
; GCN-NEXT: ; %bb.{{[0-9]+}}:
Expand Down Expand Up @@ -37,9 +37,9 @@ bb.outer.end: ; preds = %bb.outer.then, %bb.

; GCN-LABEL: {{^}}uncollapsable_nested_if:
; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER:BB[0-9_]+]]
; GCN-NEXT: s_cbranch_execz [[ENDIF_INNER:.LBB[0-9_]+]]
; GCN-NEXT: ; %bb.{{[0-9]+}}:
; GCN: store_dword
; GCN-NEXT: {{^}}[[ENDIF_INNER]]:
Expand Down Expand Up @@ -80,10 +80,10 @@ bb.outer.end: ; preds = %bb.inner.then, %bb

; GCN-LABEL: {{^}}nested_if_if_else:
; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
; GCN: s_and_saveexec_b64 [[SAVEEXEC_INNER:s\[[0-9:]+\]]]
; GCN-NEXT: s_xor_b64 [[SAVEEXEC_INNER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_INNER]]
; GCN-NEXT: s_cbranch_execz [[THEN_INNER:BB[0-9_]+]]
; GCN-NEXT: s_cbranch_execz [[THEN_INNER:.LBB[0-9_]+]]
; GCN-NEXT: ; %bb.{{[0-9]+}}:
; GCN: store_dword
; GCN: {{^}}[[THEN_INNER]]:
Expand Down Expand Up @@ -127,23 +127,23 @@ bb.outer.end: ; preds = %bb, %bb.then, %b
; GCN-LABEL: {{^}}nested_if_else_if:
; GCN: s_and_saveexec_b64 [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
; GCN-NEXT: s_xor_b64 [[SAVEEXEC_OUTER2:s\[[0-9:]+\]]], exec, [[SAVEEXEC_OUTER]]
; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:BB[0-9_]+]]
; GCN-NEXT: s_cbranch_execz [[THEN_OUTER:.LBB[0-9_]+]]
; GCN-NEXT: ; %bb.{{[0-9]+}}:
; GCN: store_dword
; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_INNER_IF_OUTER_ELSE:s\[[0-9:]+\]]]
; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:BB[0-9_]+]]
; GCN-NEXT: s_cbranch_execz [[THEN_OUTER_FLOW:.LBB[0-9_]+]]
; GCN-NEXT: ; %bb.{{[0-9]+}}:
; GCN: store_dword
; GCN-NEXT: {{^}}[[THEN_OUTER_FLOW]]:
; GCN-NEXT: s_or_b64 exec, exec, [[SAVEEXEC_INNER_IF_OUTER_ELSE]]
; GCN: {{^}}[[THEN_OUTER]]:
; GCN-NEXT: s_or_saveexec_b64 [[SAVEEXEC_OUTER3:s\[[0-9:]+\]]], [[SAVEEXEC_OUTER2]]
; GCN-NEXT: s_xor_b64 exec, exec, [[SAVEEXEC_OUTER3]]
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:BB[0-9_]+]]
; GCN-NEXT: s_cbranch_execz [[ENDIF_OUTER:.LBB[0-9_]+]]
; GCN-NEXT: ; %bb.{{[0-9]+}}:
; GCN: store_dword
; GCN-NEXT: s_and_saveexec_b64 [[SAVEEXEC_ELSE:s\[[0-9:]+\]]],
; GCN-NEXT: s_cbranch_execz [[FLOW1:BB[0-9_]+]]
; GCN-NEXT: s_cbranch_execz [[FLOW1:.LBB[0-9_]+]]
; GCN-NEXT: ; %bb.{{[0-9]+}}:
; GCN: store_dword
; GCN-NEXT: [[FLOW1]]:
Expand Down Expand Up @@ -188,7 +188,7 @@ bb.outer.end:

; GCN-LABEL: {{^}}s_endpgm_unsafe_barrier:
; GCN: s_and_saveexec_b64 [[SAVEEXEC:s\[[0-9:]+\]]]
; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9_]+]]
; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9_]+]]
; GCN-NEXT: ; %bb.{{[0-9]+}}:
; GCN: store_dword
; GCN-NEXT: {{^}}[[ENDIF]]:
Expand All @@ -213,15 +213,15 @@ bb.end: ; preds = %bb.then, %bb

; GCN-LABEL: {{^}}scc_liveness:

; GCN: [[BB1_OUTER_LOOP:BB[0-9]+_[0-9]+]]:
; GCN: [[BB1_OUTER_LOOP:.LBB[0-9]+_[0-9]+]]:
; GCN: s_or_b64 exec, exec, [[SAVEEXEC_OUTER:s\[[0-9:]+\]]]
;
; GCN: [[BB1_INNER_LOOP:BB[0-9]+_[0-9]+]]:
; GCN: [[BB1_INNER_LOOP:.LBB[0-9]+_[0-9]+]]:
; GCN: s_or_b64 exec, exec, s{{\[[0-9]+:[0-9]+\]}}
; GCN: s_andn2_b64
; GCN-NEXT: s_cbranch_execz

; GCN: [[BB1_LOOP:BB[0-9]+_[0-9]+]]:
; GCN: [[BB1_LOOP:.LBB[0-9]+_[0-9]+]]:
; GCN: s_andn2_b64 exec, exec,
; GCN-NEXT: s_cbranch_execnz [[BB1_LOOP]]

Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]]
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}

; GCN: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_execz [[ENDIF:.LBB[0-9]+_[0-9]+]]

; GCN: ; %bb.{{[0-9]+}}: ; %if
; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
Expand Down Expand Up @@ -105,10 +105,10 @@ endif:

; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]]
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
; GCN-NEXT: s_cbranch_execz [[END:BB[0-9]+_[0-9]+]]
; GCN-NEXT: s_cbranch_execz [[END:.LBB[0-9]+_[0-9]+]]


; GCN: [[LOOP:BB[0-9]+_[0-9]+]]:
; GCN: [[LOOP:.LBB[0-9]+_[0-9]+]]:
; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]]
; GCN: s_cmp_lg_u32
Expand Down Expand Up @@ -183,8 +183,8 @@ end:
; GCN: s_mov_b64 exec, [[CMP0]]

; FIXME: It makes no sense to put this skip here
; GCN: s_cbranch_execz [[FLOW:BB[0-9]+_[0-9]+]]
; GCN-NEXT: s_branch [[ELSE:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_execz [[FLOW:.LBB[0-9]+_[0-9]+]]
; GCN-NEXT: s_branch [[ELSE:.LBB[0-9]+_[0-9]+]]

; GCN: [[FLOW]]: ; %Flow
; VGPR: v_readlane_b32 s[[FLOW_S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
Expand Down Expand Up @@ -213,15 +213,15 @@ end:
; VMEM: buffer_store_dword v[[FLOW_V_SAVEEXEC]], off, s[0:3], 0 offset:[[FLOW_SAVEEXEC_OFFSET:[0-9]+]] ; 4-byte Folded Spill

; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_AND_EXEC_LO]]:[[FLOW_AND_EXEC_HI]]{{\]}}
; GCN-NEXT: s_cbranch_execz [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN-NEXT: s_cbranch_execz [[ENDIF:.LBB[0-9]+_[0-9]+]]


; GCN: ; %bb.{{[0-9]+}}: ; %if
; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
; GCN: ds_read_b32
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]]
; GCN: buffer_store_dword [[ADD]], off, s[0:3], 0 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill
; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN-NEXT: s_branch [[ENDIF:.LBB[0-9]+_[0-9]+]]

; GCN: [[ELSE]]: ; %else
; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], 0 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/cross-block-use-is-not-abi-copy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -178,19 +178,19 @@ define amdgpu_kernel void @v3i16_registers(i1 %cond) #0 {
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
; GCN-NEXT: s_and_b64 vcc, exec, s[4:5]
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_cbranch_vccnz BB4_2
; GCN-NEXT: s_cbranch_vccnz .LBB4_2
; GCN-NEXT: ; %bb.1: ; %if.else
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, func_v3i16@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, func_v3i16@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: s_branch BB4_3
; GCN-NEXT: BB4_2:
; GCN-NEXT: s_branch .LBB4_3
; GCN-NEXT: .LBB4_2:
; GCN-NEXT: s_mov_b32 s4, 0
; GCN-NEXT: s_mov_b32 s5, s4
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v1, s5
; GCN-NEXT: BB4_3: ; %if.end
; GCN-NEXT: .LBB4_3: ; %if.end
; GCN-NEXT: global_store_short v[0:1], v1, off
; GCN-NEXT: global_store_dword v[0:1], v0, off
; GCN-NEXT: s_endpgm
Expand Down Expand Up @@ -223,19 +223,19 @@ define amdgpu_kernel void @v3f16_registers(i1 %cond) #0 {
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
; GCN-NEXT: s_and_b64 vcc, exec, s[4:5]
; GCN-NEXT: s_mov_b32 s32, 0
; GCN-NEXT: s_cbranch_vccnz BB5_2
; GCN-NEXT: s_cbranch_vccnz .LBB5_2
; GCN-NEXT: ; %bb.1: ; %if.else
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, func_v3f16@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, func_v3f16@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: s_branch BB5_3
; GCN-NEXT: BB5_2:
; GCN-NEXT: s_branch .LBB5_3
; GCN-NEXT: .LBB5_2:
; GCN-NEXT: s_mov_b32 s4, 0
; GCN-NEXT: s_mov_b32 s5, s4
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v1, s5
; GCN-NEXT: BB5_3: ; %if.end
; GCN-NEXT: .LBB5_3: ; %if.end
; GCN-NEXT: global_store_short v[0:1], v1, off
; GCN-NEXT: global_store_dword v[0:1], v0, off
; GCN-NEXT: s_endpgm
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/ctpop16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1490,7 +1490,7 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace
; SI-NEXT: s_lshr_b32 s2, s4, 16
; SI-NEXT: s_cmp_lg_u32 s2, 0
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_cbranch_scc0 BB14_2
; SI-NEXT: s_cbranch_scc0 .LBB14_2
; SI-NEXT: ; %bb.1: ; %else
; SI-NEXT: s_mov_b32 s11, 0xf000
; SI-NEXT: s_mov_b32 s10, -1
Expand All @@ -1499,18 +1499,18 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace
; SI-NEXT: s_mov_b32 s9, s3
; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: s_cbranch_execz BB14_3
; SI-NEXT: s_branch BB14_4
; SI-NEXT: BB14_2:
; SI-NEXT: s_cbranch_execz .LBB14_3
; SI-NEXT: s_branch .LBB14_4
; SI-NEXT: .LBB14_2:
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b64 s[2:3], -1
; SI-NEXT: v_mov_b32_e32 v0, 0
; SI-NEXT: BB14_3: ; %if
; SI-NEXT: .LBB14_3: ; %if
; SI-NEXT: s_and_b32 s2, s4, 0xffff
; SI-NEXT: s_bcnt1_i32_b32 s2, s2
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, s2
; SI-NEXT: BB14_4: ; %endif
; SI-NEXT: .LBB14_4: ; %endif
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt vmcnt(0)
Expand All @@ -1525,23 +1525,23 @@ define amdgpu_kernel void @ctpop_i16_in_br(i16 addrspace(1)* %out, i16 addrspace
; VI-NEXT: s_lshr_b32 s0, s2, 16
; VI-NEXT: v_cmp_ne_u16_e64 s[0:1], s0, 0
; VI-NEXT: s_and_b64 vcc, exec, s[0:1]
; VI-NEXT: s_cbranch_vccz BB14_2
; VI-NEXT: s_cbranch_vccz .LBB14_2
; VI-NEXT: ; %bb.1: ; %else
; VI-NEXT: s_mov_b32 s11, 0xf000
; VI-NEXT: s_mov_b32 s10, -1
; VI-NEXT: s_mov_b32 s8, s6
; VI-NEXT: s_mov_b32 s9, s7
; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2
; VI-NEXT: s_cbranch_execz BB14_3
; VI-NEXT: s_branch BB14_4
; VI-NEXT: BB14_2:
; VI-NEXT: s_cbranch_execz .LBB14_3
; VI-NEXT: s_branch .LBB14_4
; VI-NEXT: .LBB14_2:
; VI-NEXT: ; implicit-def: $vgpr0
; VI-NEXT: BB14_3: ; %if
; VI-NEXT: .LBB14_3: ; %if
; VI-NEXT: s_and_b32 s0, s2, 0xffff
; VI-NEXT: s_bcnt1_i32_b32 s0, s0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: BB14_4: ; %endif
; VI-NEXT: .LBB14_4: ; %endif
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt vmcnt(0)
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/dag-divergence-atomic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ define protected amdgpu_kernel void @nand(i32 addrspace(1)* %p, %S addrspace(1)*
; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v1, s6
; CHECK-NEXT: BB5_1: ; %atomicrmw.start
; CHECK-NEXT: .LBB5_1: ; %atomicrmw.start
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_mov_b32_e32 v3, v1
; CHECK-NEXT: v_not_b32_e32 v1, v3
Expand All @@ -135,7 +135,7 @@ define protected amdgpu_kernel void @nand(i32 addrspace(1)* %p, %S addrspace(1)*
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
; CHECK-NEXT: s_cbranch_execnz BB5_1
; CHECK-NEXT: s_cbranch_execnz .LBB5_1
; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[0:1], v1, 12, s[2:3]
Expand Down Expand Up @@ -335,7 +335,7 @@ define protected amdgpu_kernel void @fadd(float addrspace(1)* %p, %S addrspace(1
; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v1, s6
; CHECK-NEXT: BB14_1: ; %atomicrmw.start
; CHECK-NEXT: .LBB14_1: ; %atomicrmw.start
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_mov_b32_e32 v3, v1
; CHECK-NEXT: v_add_f32_e32 v2, 1.0, v3
Expand All @@ -344,7 +344,7 @@ define protected amdgpu_kernel void @fadd(float addrspace(1)* %p, %S addrspace(1
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
; CHECK-NEXT: s_cbranch_execnz BB14_1
; CHECK-NEXT: s_cbranch_execnz .LBB14_1
; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v1
Expand All @@ -370,7 +370,7 @@ define protected amdgpu_kernel void @fsub(float addrspace(1)* %p, %S addrspace(1
; CHECK-NEXT: s_load_dword s6, s[0:1], 0x0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_mov_b32_e32 v1, s6
; CHECK-NEXT: BB15_1: ; %atomicrmw.start
; CHECK-NEXT: .LBB15_1: ; %atomicrmw.start
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: v_mov_b32_e32 v3, v1
; CHECK-NEXT: v_add_f32_e32 v2, -1.0, v3
Expand All @@ -379,7 +379,7 @@ define protected amdgpu_kernel void @fsub(float addrspace(1)* %p, %S addrspace(1
; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3
; CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; CHECK-NEXT: s_andn2_b64 exec, exec, s[4:5]
; CHECK-NEXT: s_cbranch_execnz BB15_1
; CHECK-NEXT: s_cbranch_execnz .LBB15_1
; CHECK-NEXT: ; %bb.2: ; %atomicrmw.end
; CHECK-NEXT: s_or_b64 exec, exec, s[4:5]
; CHECK-NEXT: v_cvt_u32_f32_e32 v0, v1
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/divergent-branch-uniform-condition.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,37 +26,37 @@ define amdgpu_ps void @main(i32 %0, float %1) {
; ISA-NEXT: s_mov_b64 s[0:1], 0
; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5
; ISA-NEXT: ; implicit-def: $sgpr2_sgpr3
; ISA-NEXT: s_branch BB0_3
; ISA-NEXT: BB0_1: ; %Flow1
; ISA-NEXT: s_branch .LBB0_3
; ISA-NEXT: .LBB0_1: ; %Flow1
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
; ISA-NEXT: s_or_b64 exec, exec, s[6:7]
; ISA-NEXT: s_add_i32 s8, s8, 1
; ISA-NEXT: s_mov_b64 s[6:7], 0
; ISA-NEXT: BB0_2: ; %Flow
; ISA-NEXT: .LBB0_2: ; %Flow
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
; ISA-NEXT: s_and_b64 s[10:11], exec, s[4:5]
; ISA-NEXT: s_or_b64 s[0:1], s[10:11], s[0:1]
; ISA-NEXT: s_andn2_b64 s[2:3], s[2:3], exec
; ISA-NEXT: s_and_b64 s[6:7], s[6:7], exec
; ISA-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7]
; ISA-NEXT: s_andn2_b64 exec, exec, s[0:1]
; ISA-NEXT: s_cbranch_execz BB0_6
; ISA-NEXT: BB0_3: ; %loop
; ISA-NEXT: s_cbranch_execz .LBB0_6
; ISA-NEXT: .LBB0_3: ; %loop
; ISA-NEXT: ; =>This Inner Loop Header: Depth=1
; ISA-NEXT: s_or_b64 s[4:5], s[4:5], exec
; ISA-NEXT: s_cmp_lt_u32 s8, 32
; ISA-NEXT: s_mov_b64 s[6:7], -1
; ISA-NEXT: s_cbranch_scc0 BB0_2
; ISA-NEXT: s_cbranch_scc0 .LBB0_2
; ISA-NEXT: ; %bb.4: ; %endif1
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
; ISA-NEXT: s_mov_b64 s[4:5], -1
; ISA-NEXT: s_and_saveexec_b64 s[6:7], vcc
; ISA-NEXT: s_cbranch_execz BB0_1
; ISA-NEXT: s_cbranch_execz .LBB0_1
; ISA-NEXT: ; %bb.5: ; %endif2
; ISA-NEXT: ; in Loop: Header=BB0_3 Depth=1
; ISA-NEXT: s_xor_b64 s[4:5], exec, -1
; ISA-NEXT: s_branch BB0_1
; ISA-NEXT: BB0_6: ; %Flow2
; ISA-NEXT: s_branch .LBB0_1
; ISA-NEXT: .LBB0_6: ; %Flow2
; ISA-NEXT: s_or_b64 exec, exec, s[0:1]
; ISA-NEXT: v_mov_b32_e32 v1, 0
; ISA-NEXT: s_and_saveexec_b64 s[0:1], s[2:3]
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/early-if-convert.ll
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ endif:

; Short chain of cheap instructions to not convert
; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_min_expensive:
; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_vccnz [[ENDIF:.LBB[0-9]+_[0-9]+]]

; GCN: v_mul_f32
; GCN: v_mul_f32
Expand Down Expand Up @@ -155,7 +155,7 @@ endif:
; Should still branch over fdiv expansion
; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle_expensive:
; GCN: v_cmp_neq_f32_e32
; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_vccnz [[ENDIF:.LBB[0-9]+_[0-9]+]]

; GCN: v_div_scale_f32

Expand All @@ -180,7 +180,7 @@ endif:
; vcc branch with SGPR inputs
; GCN-LABEL: {{^}}test_vccnz_sgpr_ifcvt_triangle:
; GCN: v_cmp_neq_f32_e64
; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_vccnz [[ENDIF:.LBB[0-9]+_[0-9]+]]

; GCN: s_add_i32

Expand Down Expand Up @@ -267,7 +267,7 @@ endif:
; Scalar branch but VGPR select operands
; GCN-LABEL: {{^}}test_scc1_vgpr_ifcvt_triangle:
; GCN: s_cmp_lg_u32
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_scc1 [[ENDIF:.LBB[0-9]+_[0-9]+]]

; GCN: v_add_f32_e32

Expand Down Expand Up @@ -402,7 +402,7 @@ done:

; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle256:
; GCN: v_cmp_neq_f32
; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_vccnz [[ENDIF:.LBB[0-9]+_[0-9]+]]

; GCN: v_add_i32
; GCN: v_add_i32
Expand All @@ -427,7 +427,7 @@ endif:

; GCN-LABEL: {{^}}test_vccnz_ifcvt_triangle512:
; GCN: v_cmp_neq_f32
; GCN: s_cbranch_vccnz [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: s_cbranch_vccnz [[ENDIF:.LBB[0-9]+_[0-9]+]]

; GCN: v_add_i32
; GCN: v_add_i32
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/endcf-loop-header.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
; This is was lowered from the llvm.SI.end.cf intrinsic:
; CHECK: s_or_b64 exec, exec

; CHECK: [[LOOP_LABEL:[0-9A-Za-z_]+]]: ; %loop{{$}}
; CHECK: [[LOOP_LABEL:.L[0-9A-Za-z_]+]]: ; %loop{{$}}
; CHECK-NOT: s_or_b64 exec, exec
; CHECK: s_cbranch_execnz [[LOOP_LABEL]]
define amdgpu_kernel void @test(i32 addrspace(1)* %out) {
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/expand-scalar-carry-out-select-user.ll
Original file line number Diff line number Diff line change
Expand Up @@ -83,14 +83,14 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
; GFX7-NEXT: s_addc_u32 s0, s2, 0
; GFX7-NEXT: v_cmp_ge_u32_e32 vcc, s0, v0
; GFX7-NEXT: s_and_b64 vcc, exec, vcc
; GFX7-NEXT: s_cbranch_vccnz BB1_2
; GFX7-NEXT: s_cbranch_vccnz .LBB1_2
; GFX7-NEXT: ; %bb.1: ; %bb0
; GFX7-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 9
; GFX7-NEXT: flat_store_dword v[0:1], v2
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: BB1_2: ; %bb1
; GFX7-NEXT: .LBB1_2: ; %bb1
; GFX7-NEXT: v_mov_b32_e32 v0, 0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: v_mov_b32_e32 v2, 10
Expand All @@ -110,14 +110,14 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
; GFX9-NEXT: s_addc_u32 s0, s2, 0
; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, s0, v0
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
; GFX9-NEXT: s_cbranch_vccnz BB1_2
; GFX9-NEXT: s_cbranch_vccnz .LBB1_2
; GFX9-NEXT: ; %bb.1: ; %bb0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_mov_b32_e32 v2, 9
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: BB1_2: ; %bb1
; GFX9-NEXT: .LBB1_2: ; %bb1
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: v_mov_b32_e32 v2, 10
Expand All @@ -137,14 +137,14 @@ define amdgpu_kernel void @s_add_co_br_user(i32 %i) {
; GFX10-NEXT: s_addc_u32 s0, s0, 0
; GFX10-NEXT: v_cmp_ge_u32_e32 vcc_lo, s0, v0
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
; GFX10-NEXT: s_cbranch_vccnz BB1_2
; GFX10-NEXT: s_cbranch_vccnz .LBB1_2
; GFX10-NEXT: ; %bb.1: ; %bb0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 9
; GFX10-NEXT: global_store_dword v[0:1], v2, off
; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
; GFX10-NEXT: BB1_2: ; %bb1
; GFX10-NEXT: .LBB1_2: ; %bb1
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 10
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/AMDGPU/fp64-atomics-gfx90a.ll
Original file line number Diff line number Diff line change
Expand Up @@ -421,7 +421,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)*
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
; GFX90A-NEXT: BB24_1: ; %atomicrmw.start
; GFX90A-NEXT: .LBB24_1: ; %atomicrmw.start
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
; GFX90A-NEXT: buffer_wbl2
Expand All @@ -434,7 +434,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat(double addrspace(1)*
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
; GFX90A-NEXT: s_cbranch_execnz BB24_1
; GFX90A-NEXT: s_cbranch_execnz .LBB24_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
main_body:
Expand Down Expand Up @@ -469,7 +469,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
; GFX90A-NEXT: BB26_1: ; %atomicrmw.start
; GFX90A-NEXT: .LBB26_1: ; %atomicrmw.start
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
; GFX90A-NEXT: buffer_wbl2
Expand All @@ -482,7 +482,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_system(double addrsp
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
; GFX90A-NEXT: s_cbranch_execnz BB26_1
; GFX90A-NEXT: s_cbranch_execnz .LBB26_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
main_body:
Expand Down Expand Up @@ -525,7 +525,7 @@ define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
; GFX90A-NEXT: BB29_1: ; %atomicrmw.start
; GFX90A-NEXT: .LBB29_1: ; %atomicrmw.start
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
Expand All @@ -539,7 +539,7 @@ define double @global_atomic_fadd_f64_rtn_pat(double addrspace(1)* %ptr, double
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GFX90A-NEXT: s_cbranch_execnz BB29_1
; GFX90A-NEXT: s_cbranch_execnz .LBB29_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
Expand Down Expand Up @@ -572,7 +572,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr,
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
; GFX90A-NEXT: BB31_1: ; %atomicrmw.start
; GFX90A-NEXT: .LBB31_1: ; %atomicrmw.start
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_waitcnt vmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
Expand All @@ -586,7 +586,7 @@ define double @global_atomic_fadd_f64_rtn_pat_system(double addrspace(1)* %ptr,
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GFX90A-NEXT: s_cbranch_execnz BB31_1
; GFX90A-NEXT: s_cbranch_execnz .LBB31_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
Expand Down Expand Up @@ -631,7 +631,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double ad
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], s[4:5], s[4:5] op_sel:[0,1]
; GFX90A-NEXT: BB34_1: ; %atomicrmw.start
; GFX90A-NEXT: .LBB34_1: ; %atomicrmw.start
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
Expand All @@ -642,7 +642,7 @@ define amdgpu_kernel void @global_atomic_fadd_f64_noret_pat_agent_safe(double ad
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
; GFX90A-NEXT: s_cbranch_execnz BB34_1
; GFX90A-NEXT: s_cbranch_execnz .LBB34_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
main_body:
Expand All @@ -658,7 +658,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 {
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
; GFX90A-NEXT: BB35_1: ; %atomicrmw.start
; GFX90A-NEXT: .LBB35_1: ; %atomicrmw.start
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
Expand All @@ -673,7 +673,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat(double* %ptr) #1 {
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
; GFX90A-NEXT: s_cbranch_execnz BB35_1
; GFX90A-NEXT: s_cbranch_execnz .LBB35_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
main_body:
Expand Down Expand Up @@ -707,7 +707,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
; GFX90A-NEXT: BB37_1: ; %atomicrmw.start
; GFX90A-NEXT: .LBB37_1: ; %atomicrmw.start
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
Expand All @@ -723,7 +723,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_system(double* %ptr) #
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
; GFX90A-NEXT: s_cbranch_execnz BB37_1
; GFX90A-NEXT: s_cbranch_execnz .LBB37_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
main_body:
Expand All @@ -737,7 +737,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
; GFX90A-NEXT: BB38_1: ; %atomicrmw.start
; GFX90A-NEXT: .LBB38_1: ; %atomicrmw.start
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
Expand All @@ -751,7 +751,7 @@ define double @flat_atomic_fadd_f64_rtn_pat(double* %ptr) #1 {
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GFX90A-NEXT: s_cbranch_execnz BB38_1
; GFX90A-NEXT: s_cbranch_execnz .LBB38_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
Expand Down Expand Up @@ -784,7 +784,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 {
; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
; GFX90A-NEXT: s_mov_b64 s[4:5], 0
; GFX90A-NEXT: BB40_1: ; %atomicrmw.start
; GFX90A-NEXT: .LBB40_1: ; %atomicrmw.start
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[4:5], v[2:3], v[2:3] op_sel:[0,1]
Expand All @@ -799,7 +799,7 @@ define double @flat_atomic_fadd_f64_rtn_pat_system(double* %ptr) #1 {
; GFX90A-NEXT: v_cmp_eq_u64_e32 vcc, v[2:3], v[4:5]
; GFX90A-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GFX90A-NEXT: s_cbranch_execnz BB40_1
; GFX90A-NEXT: s_cbranch_execnz .LBB40_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5]
; GFX90A-NEXT: v_mov_b32_e32 v0, v2
Expand Down Expand Up @@ -846,7 +846,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %pt
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], s[0:1], s[0:1] op_sel:[0,1]
; GFX90A-NEXT: flat_load_dwordx2 v[2:3], v[0:1]
; GFX90A-NEXT: BB43_1: ; %atomicrmw.start
; GFX90A-NEXT: .LBB43_1: ; %atomicrmw.start
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GFX90A-NEXT: v_add_f64 v[0:1], v[2:3], 4.0
Expand All @@ -859,7 +859,7 @@ define amdgpu_kernel void @flat_atomic_fadd_f64_noret_pat_agent_safe(double* %pt
; GFX90A-NEXT: s_or_b64 s[2:3], vcc, s[2:3]
; GFX90A-NEXT: v_pk_mov_b32 v[2:3], v[0:1], v[0:1] op_sel:[0,1]
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[2:3]
; GFX90A-NEXT: s_cbranch_execnz BB43_1
; GFX90A-NEXT: s_cbranch_execnz .LBB43_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
main_body:
Expand Down Expand Up @@ -994,7 +994,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double add
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_mov_b32_e32 v0, s2
; GFX90A-NEXT: ds_read_b64 v[0:1], v0
; GFX90A-NEXT: BB52_1: ; %atomicrmw.start
; GFX90A-NEXT: .LBB52_1: ; %atomicrmw.start
; GFX90A-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_add_f64 v[2:3], v[0:1], 4.0
Expand All @@ -1006,7 +1006,7 @@ define amdgpu_kernel void @local_atomic_fadd_f64_noret_pat_flush_safe(double add
; GFX90A-NEXT: s_or_b64 s[0:1], vcc, s[0:1]
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], v[2:3], v[2:3] op_sel:[0,1]
; GFX90A-NEXT: s_andn2_b64 exec, exec, s[0:1]
; GFX90A-NEXT: s_cbranch_execnz BB52_1
; GFX90A-NEXT: s_cbranch_execnz .LBB52_1
; GFX90A-NEXT: ; %bb.2: ; %atomicrmw.end
; GFX90A-NEXT: s_endpgm
main_body:
Expand Down
100 changes: 50 additions & 50 deletions llvm/test/CodeGen/AMDGPU/global-atomics-fp.ll

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/global-load-saddr-to-vaddr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,15 @@ define amdgpu_kernel void @test_move_load_address_to_vgpr(i32 addrspace(1)* noca
; GCN-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; GCN-NEXT: v_add_co_u32_e32 v0, vcc, s0, v0
; GCN-NEXT: v_addc_co_u32_e32 v1, vcc, v3, v1, vcc
; GCN-NEXT: BB0_1: ; %bb3
; GCN-NEXT: .LBB0_1: ; %bb3
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: global_load_dword v3, v[0:1], off glc
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_add_co_u32_e32 v2, vcc, 1, v2
; GCN-NEXT: v_add_co_u32_e64 v0, s[0:1], 4, v0
; GCN-NEXT: v_addc_co_u32_e64 v1, s[0:1], 0, v1, s[0:1]
; GCN-NEXT: s_and_b64 vcc, exec, vcc
; GCN-NEXT: s_cbranch_vccz BB0_1
; GCN-NEXT: s_cbranch_vccz .LBB0_1
; GCN-NEXT: ; %bb.2: ; %bb2
; GCN-NEXT: s_endpgm
bb:
Expand Down Expand Up @@ -61,7 +61,7 @@ define amdgpu_kernel void @test_move_load_address_to_vgpr_d16_hi(i16 addrspace(1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: global_load_ushort v0, v1, s[0:1] glc
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: BB1_1: ; %bb3
; GCN-NEXT: .LBB1_1: ; %bb3
; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1]
Expand All @@ -72,7 +72,7 @@ define amdgpu_kernel void @test_move_load_address_to_vgpr_d16_hi(i16 addrspace(1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, s2, v0
; GCN-NEXT: s_and_b64 vcc, exec, vcc
; GCN-NEXT: s_cbranch_vccz BB1_1
; GCN-NEXT: s_cbranch_vccz .LBB1_1
; GCN-NEXT: ; %bb.2: ; %bb2
; GCN-NEXT: s_endpgm
bb:
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/global-saddr-load.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2389,7 +2389,7 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv(float addrspace(1)* inreg %arg)
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_mov_b64 s[0:1], 0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: BB128_1: ; %bb3
; GFX9-NEXT: .LBB128_1: ; %bb3
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_add_u32 s4, s2, s0
; GFX9-NEXT: s_addc_u32 s5, s3, s1
Expand All @@ -2398,15 +2398,15 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv(float addrspace(1)* inreg %arg)
; GFX9-NEXT: s_add_u32 s0, s0, 4
; GFX9-NEXT: s_addc_u32 s1, s1, 0
; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x400
; GFX9-NEXT: s_cbranch_scc0 BB128_1
; GFX9-NEXT: s_cbranch_scc0 .LBB128_1
; GFX9-NEXT: ; %bb.2: ; %bb2
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: global_addr_64bit_lsr_iv:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_mov_b64 s[0:1], 0
; GFX10-NEXT: BB128_1: ; %bb3
; GFX10-NEXT: .LBB128_1: ; %bb3
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_add_u32 s4, s2, s0
Expand All @@ -2416,7 +2416,7 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv(float addrspace(1)* inreg %arg)
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_addc_u32 s1, s1, 0
; GFX10-NEXT: s_cmpk_eq_i32 s0, 0x400
; GFX10-NEXT: s_cbranch_scc0 BB128_1
; GFX10-NEXT: s_cbranch_scc0 .LBB128_1
; GFX10-NEXT: ; %bb.2: ; %bb2
; GFX10-NEXT: s_endpgm
bb:
Expand All @@ -2442,7 +2442,7 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv_multiload(float addrspace(1)* in
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_mov_b64 s[0:1], 0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: BB129_1: ; %bb3
; GFX9-NEXT: .LBB129_1: ; %bb3
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: s_add_u32 s4, s2, s0
; GFX9-NEXT: s_addc_u32 s5, s3, s1
Expand All @@ -2454,15 +2454,15 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv_multiload(float addrspace(1)* in
; GFX9-NEXT: s_addc_u32 s1, s1, 0
; GFX9-NEXT: s_cmpk_eq_i32 s0, 0x400
; GFX9-NEXT: ; kill: killed $sgpr4 killed $sgpr5
; GFX9-NEXT: s_cbranch_scc0 BB129_1
; GFX9-NEXT: s_cbranch_scc0 .LBB129_1
; GFX9-NEXT: ; %bb.2: ; %bb2
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: global_addr_64bit_lsr_iv_multiload:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_mov_b64 s[0:1], 0
; GFX10-NEXT: BB129_1: ; %bb3
; GFX10-NEXT: .LBB129_1: ; %bb3
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_add_u32 s4, s2, s0
Expand All @@ -2475,7 +2475,7 @@ define amdgpu_ps void @global_addr_64bit_lsr_iv_multiload(float addrspace(1)* in
; GFX10-NEXT: s_addc_u32 s1, s1, 0
; GFX10-NEXT: s_cmpk_eq_i32 s0, 0x400
; GFX10-NEXT: ; kill: killed $sgpr4 killed $sgpr5
; GFX10-NEXT: s_cbranch_scc0 BB129_1
; GFX10-NEXT: s_cbranch_scc0 .LBB129_1
; GFX10-NEXT: ; %bb.2: ; %bb2
; GFX10-NEXT: s_endpgm
bb:
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AMDGPU/i1-copy-from-loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,25 @@ define amdgpu_ps void @i1_copy_from_loop(<4 x i32> inreg %rsrc, i32 %tid) {
; SI-NEXT: s_mov_b64 s[4:5], 0
; SI-NEXT: ; implicit-def: $sgpr6_sgpr7
; SI-NEXT: ; implicit-def: $sgpr8_sgpr9
; SI-NEXT: s_branch BB0_3
; SI-NEXT: BB0_1: ; in Loop: Header=BB0_3 Depth=1
; SI-NEXT: s_branch .LBB0_3
; SI-NEXT: .LBB0_1: ; in Loop: Header=BB0_3 Depth=1
; SI-NEXT: ; implicit-def: $sgpr14
; SI-NEXT: BB0_2: ; %Flow
; SI-NEXT: .LBB0_2: ; %Flow
; SI-NEXT: ; in Loop: Header=BB0_3 Depth=1
; SI-NEXT: s_and_b64 s[12:13], exec, s[8:9]
; SI-NEXT: s_or_b64 s[4:5], s[12:13], s[4:5]
; SI-NEXT: s_andn2_b64 s[6:7], s[6:7], exec
; SI-NEXT: s_and_b64 s[10:11], s[10:11], exec
; SI-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11]
; SI-NEXT: s_andn2_b64 exec, exec, s[4:5]
; SI-NEXT: s_cbranch_execz BB0_7
; SI-NEXT: BB0_3: ; %for.body
; SI-NEXT: s_cbranch_execz .LBB0_7
; SI-NEXT: .LBB0_3: ; %for.body
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: s_cmp_lt_u32 s14, 4
; SI-NEXT: s_cselect_b64 s[10:11], -1, 0
; SI-NEXT: s_or_b64 s[8:9], s[8:9], exec
; SI-NEXT: s_cmp_gt_u32 s14, 3
; SI-NEXT: s_cbranch_scc1 BB0_1
; SI-NEXT: s_cbranch_scc1 .LBB0_1
; SI-NEXT: ; %bb.4: ; %mid.loop
; SI-NEXT: ; in Loop: Header=BB0_3 Depth=1
; SI-NEXT: v_mov_b32_e32 v1, s14
Expand All @@ -43,14 +43,14 @@ define amdgpu_ps void @i1_copy_from_loop(<4 x i32> inreg %rsrc, i32 %tid) {
; SI-NEXT: ; %bb.6: ; %Flow1
; SI-NEXT: ; in Loop: Header=BB0_3 Depth=1
; SI-NEXT: s_or_b64 exec, exec, s[12:13]
; SI-NEXT: s_branch BB0_2
; SI-NEXT: BB0_7: ; %for.end
; SI-NEXT: s_branch .LBB0_2
; SI-NEXT: .LBB0_7: ; %for.end
; SI-NEXT: s_or_b64 exec, exec, s[4:5]
; SI-NEXT: s_and_saveexec_b64 s[0:1], s[6:7]
; SI-NEXT: s_cbranch_execz BB0_9
; SI-NEXT: s_cbranch_execz .LBB0_9
; SI-NEXT: ; %bb.8: ; %if
; SI-NEXT: exp mrt0 v0, v0, v0, v0 done vm
; SI-NEXT: BB0_9: ; %end
; SI-NEXT: .LBB0_9: ; %end
; SI-NEXT: s_endpgm
entry:
br label %for.body
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/i1-copy-phi-uniform-branch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

; GCN: ; %entry
; GCN: s_cmp_eq_u32 s0, 0
; GCN: s_cbranch_scc1 [[EXIT:BB[0-9_]+]]
; GCN: s_cbranch_scc1 [[EXIT:.LBB[0-9_]+]]

; GCN: ; %blocka
; GCN: s_cmp_eq_u32 s1, 0
; GCN: s_cbranch_scc1 [[PREEXIT:BB[0-9_]+]]
; GCN: s_cbranch_scc1 [[PREEXIT:.LBB[0-9_]+]]

; GCN: [[PREEXIT]]:
; GCN: [[EXIT]]:
Expand Down
64 changes: 32 additions & 32 deletions llvm/test/CodeGen/AMDGPU/idiv-licm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: BB0_1: ; %bb3
; GFX9-NEXT: .LBB0_1: ; %bb3
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mul_lo_u32 v2, s3, v0
; GFX9-NEXT: v_mul_hi_u32 v3, s2, v0
Expand All @@ -41,7 +41,7 @@ define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX9-NEXT: s_add_u32 s0, s0, 4
; GFX9-NEXT: s_addc_u32 s1, s1, 0
; GFX9-NEXT: s_cmpk_eq_i32 s2, 0x400
; GFX9-NEXT: s_cbranch_scc0 BB0_1
; GFX9-NEXT: s_cbranch_scc0 .LBB0_1
; GFX9-NEXT: ; %bb.2: ; %bb2
; GFX9-NEXT: s_endpgm
;
Expand All @@ -60,7 +60,7 @@ define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: BB0_1: ; %bb3
; GFX10-NEXT: .LBB0_1: ; %bb3
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mul_lo_u32 v2, s3, v0
; GFX10-NEXT: v_mul_hi_u32 v3, s2, v0
Expand All @@ -84,7 +84,7 @@ define amdgpu_kernel void @udiv32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX10-NEXT: s_add_u32 s0, s0, 4
; GFX10-NEXT: s_addc_u32 s1, s1, 0
; GFX10-NEXT: s_cmpk_eq_i32 s2, 0x400
; GFX10-NEXT: s_cbranch_scc0 BB0_1
; GFX10-NEXT: s_cbranch_scc0 .LBB0_1
; GFX10-NEXT: ; %bb.2: ; %bb2
; GFX10-NEXT: s_endpgm
bb:
Expand Down Expand Up @@ -120,7 +120,7 @@ define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: BB1_1: ; %bb3
; GFX9-NEXT: .LBB1_1: ; %bb3
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mul_lo_u32 v2, s3, v0
; GFX9-NEXT: v_mul_hi_u32 v3, s2, v0
Expand All @@ -141,7 +141,7 @@ define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX9-NEXT: s_add_u32 s0, s0, 4
; GFX9-NEXT: s_addc_u32 s1, s1, 0
; GFX9-NEXT: s_cmpk_eq_i32 s2, 0x400
; GFX9-NEXT: s_cbranch_scc0 BB1_1
; GFX9-NEXT: s_cbranch_scc0 .LBB1_1
; GFX9-NEXT: ; %bb.2: ; %bb2
; GFX9-NEXT: s_endpgm
;
Expand All @@ -160,7 +160,7 @@ define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: BB1_1: ; %bb3
; GFX10-NEXT: .LBB1_1: ; %bb3
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mul_lo_u32 v2, s3, v0
; GFX10-NEXT: v_mul_hi_u32 v3, s2, v0
Expand All @@ -182,7 +182,7 @@ define amdgpu_kernel void @urem32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX10-NEXT: s_add_u32 s0, s0, 4
; GFX10-NEXT: s_addc_u32 s1, s1, 0
; GFX10-NEXT: s_cmpk_eq_i32 s2, 0x400
; GFX10-NEXT: s_cbranch_scc0 BB1_1
; GFX10-NEXT: s_cbranch_scc0 .LBB1_1
; GFX10-NEXT: ; %bb.2: ; %bb2
; GFX10-NEXT: s_endpgm
bb:
Expand Down Expand Up @@ -221,7 +221,7 @@ define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: BB2_1: ; %bb3
; GFX9-NEXT: .LBB2_1: ; %bb3
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mul_hi_u32 v2, s4, v0
; GFX9-NEXT: v_mul_lo_u32 v3, v2, s3
Expand All @@ -242,7 +242,7 @@ define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX9-NEXT: s_add_u32 s0, s0, 4
; GFX9-NEXT: s_addc_u32 s1, s1, 0
; GFX9-NEXT: s_cmpk_eq_i32 s4, 0x400
; GFX9-NEXT: s_cbranch_scc0 BB2_1
; GFX9-NEXT: s_cbranch_scc0 .LBB2_1
; GFX9-NEXT: ; %bb.2: ; %bb2
; GFX9-NEXT: s_endpgm
;
Expand All @@ -264,7 +264,7 @@ define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: BB2_1: ; %bb3
; GFX10-NEXT: .LBB2_1: ; %bb3
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mul_hi_u32 v2, s4, v0
; GFX10-NEXT: v_mul_lo_u32 v3, v2, s3
Expand All @@ -286,7 +286,7 @@ define amdgpu_kernel void @sdiv32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX10-NEXT: s_add_u32 s0, s0, 4
; GFX10-NEXT: s_addc_u32 s1, s1, 0
; GFX10-NEXT: s_cmpk_eq_i32 s4, 0x400
; GFX10-NEXT: s_cbranch_scc0 BB2_1
; GFX10-NEXT: s_cbranch_scc0 .LBB2_1
; GFX10-NEXT: ; %bb.2: ; %bb2
; GFX10-NEXT: s_endpgm
bb:
Expand Down Expand Up @@ -325,7 +325,7 @@ define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX9-NEXT: v_add_u32_e32 v0, v0, v1
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: BB3_1: ; %bb3
; GFX9-NEXT: .LBB3_1: ; %bb3
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_mul_hi_u32 v2, s3, v0
; GFX9-NEXT: v_mul_lo_u32 v2, v2, s2
Expand All @@ -342,7 +342,7 @@ define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX9-NEXT: s_add_u32 s0, s0, 4
; GFX9-NEXT: s_addc_u32 s1, s1, 0
; GFX9-NEXT: s_cmpk_eq_i32 s3, 0x400
; GFX9-NEXT: s_cbranch_scc0 BB3_1
; GFX9-NEXT: s_cbranch_scc0 .LBB3_1
; GFX9-NEXT: ; %bb.2: ; %bb2
; GFX9-NEXT: s_endpgm
;
Expand All @@ -364,7 +364,7 @@ define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: BB3_1: ; %bb3
; GFX10-NEXT: .LBB3_1: ; %bb3
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_mul_hi_u32 v2, s3, v0
; GFX10-NEXT: v_mul_lo_u32 v2, v2, s2
Expand All @@ -382,7 +382,7 @@ define amdgpu_kernel void @srem32_invariant_denom(i32 addrspace(1)* nocapture %a
; GFX10-NEXT: s_add_u32 s0, s0, 4
; GFX10-NEXT: s_addc_u32 s1, s1, 0
; GFX10-NEXT: s_cmpk_eq_i32 s3, 0x400
; GFX10-NEXT: s_cbranch_scc0 BB3_1
; GFX10-NEXT: s_cbranch_scc0 .LBB3_1
; GFX10-NEXT: ; %bb.2: ; %bb2
; GFX10-NEXT: s_endpgm
bb:
Expand Down Expand Up @@ -415,7 +415,7 @@ define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s2
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v2
; GFX9-NEXT: BB4_1: ; %bb3
; GFX9-NEXT: .LBB4_1: ; %bb3
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_and_b32_e32 v0, s4, v4
; GFX9-NEXT: v_cvt_f32_u32_e32 v8, v0
Expand All @@ -434,7 +434,7 @@ define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
; GFX9-NEXT: v_addc_co_u32_e64 v0, s[0:1], 0, v7, s[0:1]
; GFX9-NEXT: global_store_short v[5:6], v0, off
; GFX9-NEXT: s_cbranch_vccz BB4_1
; GFX9-NEXT: s_cbranch_vccz .LBB4_1
; GFX9-NEXT: ; %bb.2: ; %bb2
; GFX9-NEXT: s_endpgm
;
Expand All @@ -450,7 +450,7 @@ define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX10-NEXT: s_and_b32 s0, s1, s4
; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s0
; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v2
; GFX10-NEXT: BB4_1: ; %bb3
; GFX10-NEXT: .LBB4_1: ; %bb3
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_and_b32_e32 v0, s1, v4
; GFX10-NEXT: v_add_nc_u16 v4, v4, 1
Expand All @@ -467,7 +467,7 @@ define amdgpu_kernel void @udiv16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX10-NEXT: v_cmp_ge_f32_e64 s0, |v7|, v2
; GFX10-NEXT: v_add_co_ci_u32_e64 v0, s0, 0, v0, s0
; GFX10-NEXT: global_store_short v[5:6], v0, off
; GFX10-NEXT: s_cbranch_vccz BB4_1
; GFX10-NEXT: s_cbranch_vccz .LBB4_1
; GFX10-NEXT: ; %bb.2: ; %bb2
; GFX10-NEXT: s_endpgm
bb:
Expand Down Expand Up @@ -500,7 +500,7 @@ define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX9-NEXT: v_cvt_f32_u32_e32 v2, s7
; GFX9-NEXT: v_mov_b32_e32 v4, 0
; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v2
; GFX9-NEXT: BB5_1: ; %bb3
; GFX9-NEXT: .LBB5_1: ; %bb3
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_and_b32_e32 v0, s6, v4
; GFX9-NEXT: v_cvt_f32_u32_e32 v8, v0
Expand All @@ -520,7 +520,7 @@ define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v7, v6, s[0:1]
; GFX9-NEXT: v_sub_u32_e32 v0, v0, v8
; GFX9-NEXT: global_store_short v[5:6], v0, off
; GFX9-NEXT: s_cbranch_vccz BB5_1
; GFX9-NEXT: s_cbranch_vccz .LBB5_1
; GFX9-NEXT: ; %bb.2: ; %bb2
; GFX9-NEXT: s_endpgm
;
Expand All @@ -536,7 +536,7 @@ define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX10-NEXT: s_and_b32 s4, s1, s4
; GFX10-NEXT: v_cvt_f32_u32_e32 v2, s4
; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v2
; GFX10-NEXT: BB5_1: ; %bb3
; GFX10-NEXT: .LBB5_1: ; %bb3
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_and_b32_e32 v0, s1, v4
; GFX10-NEXT: v_add_nc_u16 v4, v4, 1
Expand All @@ -555,7 +555,7 @@ define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v7
; GFX10-NEXT: global_store_short v[5:6], v0, off
; GFX10-NEXT: s_cbranch_vccz BB5_1
; GFX10-NEXT: s_cbranch_vccz .LBB5_1
; GFX10-NEXT: ; %bb.2: ; %bb2
; GFX10-NEXT: s_endpgm
bb:
Expand Down Expand Up @@ -587,7 +587,7 @@ define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX9-NEXT: v_cvt_f32_i32_e32 v2, s4
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v2
; GFX9-NEXT: BB6_1: ; %bb3
; GFX9-NEXT: .LBB6_1: ; %bb3
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_bfe_i32 v5, v4, 0, 16
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v4
Expand All @@ -611,7 +611,7 @@ define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX9-NEXT: s_and_b64 vcc, exec, vcc
; GFX9-NEXT: v_add_u32_e32 v0, v8, v0
; GFX9-NEXT: global_store_short v[5:6], v0, off
; GFX9-NEXT: s_cbranch_vccz BB6_1
; GFX9-NEXT: s_cbranch_vccz .LBB6_1
; GFX9-NEXT: ; %bb.2: ; %bb2
; GFX9-NEXT: s_endpgm
;
Expand All @@ -626,7 +626,7 @@ define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX10-NEXT: s_sext_i32_i16 s4, s4
; GFX10-NEXT: v_cvt_f32_i32_e32 v2, s4
; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v2
; GFX10-NEXT: BB6_1: ; %bb3
; GFX10-NEXT: .LBB6_1: ; %bb3
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_bfe_i32 v5, v4, 0, 16
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v4
Expand All @@ -648,7 +648,7 @@ define amdgpu_kernel void @sdiv16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX10-NEXT: v_cndmask_b32_e64 v7, 0, v8, s1
; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v7
; GFX10-NEXT: global_store_short v[5:6], v0, off
; GFX10-NEXT: s_cbranch_vccz BB6_1
; GFX10-NEXT: s_cbranch_vccz .LBB6_1
; GFX10-NEXT: ; %bb.2: ; %bb2
; GFX10-NEXT: s_endpgm
bb:
Expand Down Expand Up @@ -680,7 +680,7 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX9-NEXT: s_sext_i32_i16 s6, s2
; GFX9-NEXT: v_cvt_f32_i32_e32 v2, s6
; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v2
; GFX9-NEXT: BB7_1: ; %bb3
; GFX9-NEXT: .LBB7_1: ; %bb3
; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_bfe_i32 v7, v4, 0, 16
; GFX9-NEXT: v_cvt_f32_i32_e32 v10, v7
Expand All @@ -705,7 +705,7 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX9-NEXT: v_addc_co_u32_e64 v6, s[0:1], v8, v6, s[0:1]
; GFX9-NEXT: v_sub_u32_e32 v0, v7, v0
; GFX9-NEXT: global_store_short v[5:6], v0, off
; GFX9-NEXT: s_cbranch_vccz BB7_1
; GFX9-NEXT: s_cbranch_vccz .LBB7_1
; GFX9-NEXT: ; %bb.2: ; %bb2
; GFX9-NEXT: s_endpgm
;
Expand All @@ -720,7 +720,7 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX10-NEXT: s_sext_i32_i16 s1, s4
; GFX10-NEXT: v_cvt_f32_i32_e32 v2, s1
; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v2
; GFX10-NEXT: BB7_1: ; %bb3
; GFX10-NEXT: .LBB7_1: ; %bb3
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_bfe_i32 v7, v4, 0, 16
; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v4
Expand All @@ -744,7 +744,7 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a
; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo
; GFX10-NEXT: v_sub_nc_u32_e32 v0, v7, v0
; GFX10-NEXT: global_store_short v[5:6], v0, off
; GFX10-NEXT: s_cbranch_vccz BB7_1
; GFX10-NEXT: s_cbranch_vccz .LBB7_1
; GFX10-NEXT: ; %bb.2: ; %bb2
; GFX10-NEXT: s_endpgm
bb:
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/image-sample-waterfall.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32, float, float, <

; GCN-LABEL: {{^}}water_loop_rsrc:

; GCN: [[RSRC_LOOP:[a-zA-Z0-9_]+]]: ; =>This Inner Loop Header: Depth=1
; GCN: [[RSRC_LOOP:.L[a-zA-Z0-9_]+]]: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s[[SREG0:[0-9]+]], v[[VREG0:[0-9]+]]
; GCN-NEXT: v_readfirstlane_b32 s[[SREG1:[0-9]+]], v[[VREG1:[0-9]+]]
; GCN-NEXT: v_readfirstlane_b32 s[[SREG2:[0-9]+]], v[[VREG2:[0-9]+]]
Expand Down Expand Up @@ -37,7 +37,7 @@ main_body:

; GCN-LABEL: {{^}}water_loop_samp:

; GCN: [[SAMP_LOOP:[a-zA-Z0-9_]+]]: ; =>This Inner Loop Header: Depth=1
; GCN: [[SAMP_LOOP:.L[a-zA-Z0-9_]+]]: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s[[SREG0:[0-9]+]], v[[VREG0:[0-9]+]]
; GCN-NEXT: v_readfirstlane_b32 s[[SREG1:[0-9]+]], v[[VREG1:[0-9]+]]
; GCN-NEXT: v_readfirstlane_b32 s[[SREG2:[0-9]+]], v[[VREG2:[0-9]+]]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ entry:
; CHECK-LABEL: {{^}}extract_adjacent_blocks:
; CHECK: s_load_dword [[ARG:s[0-9]+]]
; CHECK: s_cmp_lg_u32
; CHECK: s_cbranch_scc1 [[BB4:BB[0-9]+_[0-9]+]]
; CHECK: s_cbranch_scc1 [[BB4:.LBB[0-9]+_[0-9]+]]

; CHECK: buffer_load_dwordx4

; CHECK: s_branch [[ENDBB:BB[0-9]+_[0-9]+]]
; CHECK: s_branch [[ENDBB:.LBB[0-9]+_[0-9]+]]

; CHECK: [[BB4]]:
; CHECK: buffer_load_dwordx4
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
Original file line number Diff line number Diff line change
Expand Up @@ -508,12 +508,12 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace(
; GCN-LABEL: {{^}}broken_phi_bb:
; GCN: v_mov_b32_e32 [[PHIREG:v[0-9]+]], 8

; GCN: {{BB[0-9]+_[0-9]+}}:
; GCN: [[BB2:BB[0-9]+_[0-9]+]]:
; GCN: {{.LBB[0-9]+_[0-9]+}}:
; GCN: [[BB2:.LBB[0-9]+_[0-9]+]]:
; GCN: v_cmp_le_i32_e32 vcc, s{{[0-9]+}}, [[PHIREG]]
; GCN: buffer_load_dword

; GCN: [[REGLOOP:BB[0-9]+_[0-9]+]]:
; GCN: [[REGLOOP:.LBB[0-9]+_[0-9]+]]:
; MOVREL: v_movreld_b32_e32

; IDXMODE: s_set_gpr_idx_on
Expand Down
72 changes: 36 additions & 36 deletions llvm/test/CodeGen/AMDGPU/indirect-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
; GCN-NEXT: s_mov_b64 s[46:47], exec
; GCN-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s16, v0
; GCN-NEXT: v_readfirstlane_b32 s17, v1
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
Expand All @@ -440,7 +440,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN-NEXT: ; implicit-def: $vgpr31
; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
; GCN-NEXT: s_cbranch_execnz BB2_1
; GCN-NEXT: s_cbranch_execnz .LBB2_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[46:47]
; GCN-NEXT: v_readlane_b32 s4, v40, 15
Expand Down Expand Up @@ -502,7 +502,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s30, 15
; GISEL-NEXT: v_writelane_b32 v40, s31, 16
; GISEL-NEXT: s_mov_b64 s[46:47], exec
; GISEL-NEXT: BB2_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: .LBB2_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
Expand All @@ -518,7 +518,7 @@ define void @test_indirect_call_vgpr_ptr(void()* %fptr) {
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: ; implicit-def: $vgpr31
; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
; GISEL-NEXT: s_cbranch_execnz BB2_1
; GISEL-NEXT: s_cbranch_execnz .LBB2_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[46:47]
; GISEL-NEXT: v_readlane_b32 s4, v40, 15
Expand Down Expand Up @@ -585,7 +585,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
; GCN-NEXT: s_mov_b64 s[46:47], exec
; GCN-NEXT: v_mov_b32_e32 v2, 0x7b
; GCN-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s16, v0
; GCN-NEXT: v_readfirstlane_b32 s17, v1
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
Expand All @@ -603,7 +603,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GCN-NEXT: ; implicit-def: $vgpr31
; GCN-NEXT: ; implicit-def: $vgpr2
; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
; GCN-NEXT: s_cbranch_execnz BB3_1
; GCN-NEXT: s_cbranch_execnz .LBB3_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[46:47]
; GCN-NEXT: v_readlane_b32 s4, v40, 15
Expand Down Expand Up @@ -665,7 +665,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s30, 15
; GISEL-NEXT: v_writelane_b32 v40, s31, 16
; GISEL-NEXT: s_mov_b64 s[46:47], exec
; GISEL-NEXT: BB3_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
Expand All @@ -682,7 +682,7 @@ define void @test_indirect_call_vgpr_ptr_arg(void(i32)* %fptr) {
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: ; implicit-def: $vgpr31
; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
; GISEL-NEXT: s_cbranch_execnz BB3_1
; GISEL-NEXT: s_cbranch_execnz .LBB3_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[46:47]
; GISEL-NEXT: v_readlane_b32 s4, v40, 15
Expand Down Expand Up @@ -748,7 +748,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GCN-NEXT: s_mov_b64 s[38:39], s[6:7]
; GCN-NEXT: s_mov_b64 s[40:41], s[4:5]
; GCN-NEXT: s_mov_b64 s[46:47], exec
; GCN-NEXT: BB4_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s16, v0
; GCN-NEXT: v_readfirstlane_b32 s17, v1
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
Expand All @@ -765,7 +765,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN-NEXT: ; implicit-def: $vgpr31
; GCN-NEXT: s_xor_b64 exec, exec, s[48:49]
; GCN-NEXT: s_cbranch_execnz BB4_1
; GCN-NEXT: s_cbranch_execnz .LBB4_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[46:47]
; GCN-NEXT: v_add_i32_e32 v0, vcc, 1, v2
Expand Down Expand Up @@ -828,7 +828,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s30, 15
; GISEL-NEXT: v_writelane_b32 v40, s31, 16
; GISEL-NEXT: s_mov_b64 s[46:47], exec
; GISEL-NEXT: BB4_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: .LBB4_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
Expand All @@ -845,7 +845,7 @@ define i32 @test_indirect_call_vgpr_ptr_ret(i32()* %fptr) {
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: ; implicit-def: $vgpr31
; GISEL-NEXT: s_xor_b64 exec, exec, s[48:49]
; GISEL-NEXT: s_cbranch_execnz BB4_1
; GISEL-NEXT: s_cbranch_execnz .LBB4_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[46:47]
; GISEL-NEXT: v_add_i32_e32 v0, vcc, 1, v2
Expand Down Expand Up @@ -915,12 +915,12 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GCN-NEXT: v_and_b32_e32 v2, 1, v2
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2
; GCN-NEXT: s_and_saveexec_b64 s[46:47], vcc
; GCN-NEXT: s_cbranch_execz BB5_4
; GCN-NEXT: s_cbranch_execz .LBB5_4
; GCN-NEXT: ; %bb.1: ; %bb1
; GCN-NEXT: v_writelane_b32 v40, s30, 17
; GCN-NEXT: v_writelane_b32 v40, s31, 18
; GCN-NEXT: s_mov_b64 s[48:49], exec
; GCN-NEXT: BB5_2: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s16, v0
; GCN-NEXT: v_readfirstlane_b32 s17, v1
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
Expand All @@ -936,12 +936,12 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN-NEXT: ; implicit-def: $vgpr31
; GCN-NEXT: s_xor_b64 exec, exec, s[50:51]
; GCN-NEXT: s_cbranch_execnz BB5_2
; GCN-NEXT: s_cbranch_execnz .LBB5_2
; GCN-NEXT: ; %bb.3:
; GCN-NEXT: s_mov_b64 exec, s[48:49]
; GCN-NEXT: v_readlane_b32 s30, v40, 17
; GCN-NEXT: v_readlane_b32 s31, v40, 18
; GCN-NEXT: BB5_4: ; %bb2
; GCN-NEXT: .LBB5_4: ; %bb2
; GCN-NEXT: s_or_b64 exec, exec, s[46:47]
; GCN-NEXT: v_readlane_b32 s51, v40, 16
; GCN-NEXT: v_readlane_b32 s50, v40, 15
Expand Down Expand Up @@ -1004,12 +1004,12 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GISEL-NEXT: v_and_b32_e32 v2, 1, v2
; GISEL-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2
; GISEL-NEXT: s_and_saveexec_b64 s[46:47], vcc
; GISEL-NEXT: s_cbranch_execz BB5_4
; GISEL-NEXT: s_cbranch_execz .LBB5_4
; GISEL-NEXT: ; %bb.1: ; %bb1
; GISEL-NEXT: v_writelane_b32 v40, s30, 17
; GISEL-NEXT: v_writelane_b32 v40, s31, 18
; GISEL-NEXT: s_mov_b64 s[48:49], exec
; GISEL-NEXT: BB5_2: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: .LBB5_2: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s16, v0
; GISEL-NEXT: v_readfirstlane_b32 s17, v1
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[16:17], v[0:1]
Expand All @@ -1025,12 +1025,12 @@ define void @test_indirect_call_vgpr_ptr_in_branch(void()* %fptr, i1 %cond) {
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: ; implicit-def: $vgpr31
; GISEL-NEXT: s_xor_b64 exec, exec, s[50:51]
; GISEL-NEXT: s_cbranch_execnz BB5_2
; GISEL-NEXT: s_cbranch_execnz .LBB5_2
; GISEL-NEXT: ; %bb.3:
; GISEL-NEXT: s_mov_b64 exec, s[48:49]
; GISEL-NEXT: v_readlane_b32 s30, v40, 17
; GISEL-NEXT: v_readlane_b32 s31, v40, 18
; GISEL-NEXT: BB5_4: ; %bb2
; GISEL-NEXT: .LBB5_4: ; %bb2
; GISEL-NEXT: s_or_b64 exec, exec, s[46:47]
; GISEL-NEXT: v_readlane_b32 s51, v40, 16
; GISEL-NEXT: v_readlane_b32 s50, v40, 15
Expand Down Expand Up @@ -1110,15 +1110,15 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
; GCN-NEXT: s_mov_b64 s[6:7], s[30:31]
; GCN-NEXT: s_mov_b64 s[8:9], exec
; GCN-NEXT: s_movk_i32 s4, 0x7b
; GCN-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s12, v0
; GCN-NEXT: v_readfirstlane_b32 s13, v1
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[12:13], v[0:1]
; GCN-NEXT: s_and_saveexec_b64 s[10:11], vcc
; GCN-NEXT: s_swappc_b64 s[30:31], s[12:13]
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN-NEXT: s_xor_b64 exec, exec, s[10:11]
; GCN-NEXT: s_cbranch_execnz BB6_1
; GCN-NEXT: s_cbranch_execnz .LBB6_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[8:9]
; GCN-NEXT: v_readlane_b32 s63, v40, 29
Expand Down Expand Up @@ -1201,15 +1201,15 @@ define void @test_indirect_call_vgpr_ptr_inreg_arg(void(i32)* %fptr) {
; GISEL-NEXT: s_mov_b64 s[6:7], s[30:31]
; GISEL-NEXT: s_movk_i32 s4, 0x7b
; GISEL-NEXT: s_mov_b64 s[8:9], exec
; GISEL-NEXT: BB6_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: .LBB6_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s10, v0
; GISEL-NEXT: v_readfirstlane_b32 s11, v1
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1]
; GISEL-NEXT: s_and_saveexec_b64 s[12:13], vcc
; GISEL-NEXT: s_swappc_b64 s[30:31], s[10:11]
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: s_xor_b64 exec, exec, s[12:13]
; GISEL-NEXT: s_cbranch_execnz BB6_1
; GISEL-NEXT: s_cbranch_execnz .LBB6_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[8:9]
; GISEL-NEXT: v_readlane_b32 s63, v40, 29
Expand Down Expand Up @@ -1297,7 +1297,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr)
; GCN-NEXT: s_mov_b64 s[4:5], s[30:31]
; GCN-NEXT: v_mov_b32_e32 v41, v0
; GCN-NEXT: s_mov_b64 s[6:7], exec
; GCN-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s10, v1
; GCN-NEXT: v_readfirstlane_b32 s11, v2
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2]
Expand All @@ -1306,7 +1306,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr)
; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11]
; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2
; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
; GCN-NEXT: s_cbranch_execnz BB7_1
; GCN-NEXT: s_cbranch_execnz .LBB7_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: v_mov_b32_e32 v0, v41
Expand Down Expand Up @@ -1392,7 +1392,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr)
; GISEL-NEXT: v_mov_b32_e32 v41, v0
; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31]
; GISEL-NEXT: s_mov_b64 s[6:7], exec
; GISEL-NEXT: BB7_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s8, v1
; GISEL-NEXT: v_readfirstlane_b32 s9, v2
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
Expand All @@ -1401,7 +1401,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, void(i32)* %fptr)
; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2
; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11]
; GISEL-NEXT: s_cbranch_execnz BB7_1
; GISEL-NEXT: s_cbranch_execnz .LBB7_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[6:7]
; GISEL-NEXT: v_mov_b32_e32 v0, v41
Expand Down Expand Up @@ -1493,7 +1493,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr)
; GCN-NEXT: v_writelane_b32 v40, s63, 29
; GCN-NEXT: s_mov_b64 s[4:5], s[30:31]
; GCN-NEXT: s_mov_b64 s[6:7], exec
; GCN-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s10, v1
; GCN-NEXT: v_readfirstlane_b32 s11, v2
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[1:2]
Expand All @@ -1503,7 +1503,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr)
; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
; GCN-NEXT: s_cbranch_execnz BB8_1
; GCN-NEXT: s_cbranch_execnz .LBB8_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: v_mov_b32_e32 v0, v3
Expand Down Expand Up @@ -1586,7 +1586,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr)
; GISEL-NEXT: v_writelane_b32 v40, s63, 29
; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31]
; GISEL-NEXT: s_mov_b64 s[6:7], exec
; GISEL-NEXT: BB8_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: .LBB8_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s8, v1
; GISEL-NEXT: v_readfirstlane_b32 s9, v2
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[1:2]
Expand All @@ -1596,7 +1596,7 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_return(i32 %i, i32(i32)* %fptr)
; GISEL-NEXT: ; implicit-def: $vgpr1_vgpr2
; GISEL-NEXT: ; implicit-def: $vgpr0
; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11]
; GISEL-NEXT: s_cbranch_execnz BB8_1
; GISEL-NEXT: s_cbranch_execnz .LBB8_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[6:7]
; GISEL-NEXT: v_mov_b32_e32 v0, v3
Expand Down Expand Up @@ -1684,15 +1684,15 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) {
; GCN-NEXT: v_writelane_b32 v40, s63, 29
; GCN-NEXT: s_mov_b64 s[4:5], s[30:31]
; GCN-NEXT: s_mov_b64 s[6:7], exec
; GCN-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s10, v0
; GCN-NEXT: v_readfirstlane_b32 s11, v1
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[10:11], v[0:1]
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GCN-NEXT: s_swappc_b64 s[30:31], s[10:11]
; GCN-NEXT: ; implicit-def: $vgpr0_vgpr1
; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
; GCN-NEXT: s_cbranch_execnz BB9_1
; GCN-NEXT: s_cbranch_execnz .LBB9_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: v_readlane_b32 s63, v40, 29
Expand Down Expand Up @@ -1774,15 +1774,15 @@ define void @test_indirect_tail_call_vgpr_ptr(void()* %fptr) {
; GISEL-NEXT: v_writelane_b32 v40, s63, 29
; GISEL-NEXT: s_mov_b64 s[4:5], s[30:31]
; GISEL-NEXT: s_mov_b64 s[6:7], exec
; GISEL-NEXT: BB9_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: .LBB9_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s8, v0
; GISEL-NEXT: v_readfirstlane_b32 s9, v1
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[8:9], v[0:1]
; GISEL-NEXT: s_and_saveexec_b64 s[10:11], vcc
; GISEL-NEXT: s_swappc_b64 s[30:31], s[8:9]
; GISEL-NEXT: ; implicit-def: $vgpr0_vgpr1
; GISEL-NEXT: s_xor_b64 exec, exec, s[10:11]
; GISEL-NEXT: s_cbranch_execnz BB9_1
; GISEL-NEXT: s_cbranch_execnz .LBB9_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[6:7]
; GISEL-NEXT: v_readlane_b32 s63, v40, 29
Expand Down
40 changes: 20 additions & 20 deletions llvm/test/CodeGen/AMDGPU/infinite-loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) {
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: BB0_1: ; %loop
; SI-NEXT: .LBB0_1: ; %loop
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_branch BB0_1
; SI-NEXT: s_branch .LBB0_1
; IR-LABEL: @infinite_loop(
; IR-NEXT: entry:
; IR-NEXT: br label [[LOOP:%.*]]
Expand All @@ -35,21 +35,21 @@ define amdgpu_kernel void @infinite_loop_ret(i32 addrspace(1)* %out) {
; SI: ; %bb.0: ; %entry
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
; SI-NEXT: s_cbranch_execz BB1_3
; SI-NEXT: s_cbranch_execz .LBB1_3
; SI-NEXT: ; %bb.1: ; %loop.preheader
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: s_and_b64 vcc, exec, -1
; SI-NEXT: BB1_2: ; %loop
; SI-NEXT: .LBB1_2: ; %loop
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
; SI-NEXT: s_cbranch_vccnz BB1_2
; SI-NEXT: BB1_3: ; %UnifiedReturnBlock
; SI-NEXT: s_cbranch_vccnz .LBB1_2
; SI-NEXT: .LBB1_3: ; %UnifiedReturnBlock
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loop_ret(
; IR-NEXT: entry:
Expand Down Expand Up @@ -79,39 +79,39 @@ define amdgpu_kernel void @infinite_loops(i32 addrspace(1)* %out) {
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; SI-NEXT: s_mov_b64 s[2:3], -1
; SI-NEXT: s_cbranch_scc1 BB2_4
; SI-NEXT: s_cbranch_scc1 .LBB2_4
; SI-NEXT: ; %bb.1:
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x378
; SI-NEXT: s_and_b64 vcc, exec, -1
; SI-NEXT: BB2_2: ; %loop2
; SI-NEXT: .LBB2_2: ; %loop2
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
; SI-NEXT: s_cbranch_vccnz BB2_2
; SI-NEXT: s_cbranch_vccnz .LBB2_2
; SI-NEXT: ; %bb.3: ; %Flow
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: BB2_4: ; %Flow2
; SI-NEXT: .LBB2_4: ; %Flow2
; SI-NEXT: s_and_b64 vcc, exec, s[2:3]
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
; SI-NEXT: s_cbranch_vccz BB2_7
; SI-NEXT: s_cbranch_vccz .LBB2_7
; SI-NEXT: ; %bb.5:
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: s_and_b64 vcc, exec, 0
; SI-NEXT: BB2_6: ; %loop1
; SI-NEXT: .LBB2_6: ; %loop1
; SI-NEXT: ; =>This Inner Loop Header: Depth=1
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
; SI-NEXT: s_cbranch_vccz BB2_6
; SI-NEXT: BB2_7: ; %DummyReturnBlock
; SI-NEXT: s_cbranch_vccz .LBB2_6
; SI-NEXT: .LBB2_7: ; %DummyReturnBlock
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loops(
; IR-NEXT: entry:
Expand Down Expand Up @@ -141,18 +141,18 @@ define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
; SI: ; %bb.0: ; %entry
; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; SI-NEXT: s_and_saveexec_b64 s[2:3], vcc
; SI-NEXT: s_cbranch_execz BB3_5
; SI-NEXT: s_cbranch_execz .LBB3_5
; SI-NEXT: ; %bb.1: ; %outer_loop.preheader
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], 3, v0
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: v_mov_b32_e32 v0, 0x3e7
; SI-NEXT: BB3_2: ; %outer_loop
; SI-NEXT: .LBB3_2: ; %outer_loop
; SI-NEXT: ; =>This Loop Header: Depth=1
; SI-NEXT: ; Child Loop BB3_3 Depth 2
; SI-NEXT: s_mov_b64 s[2:3], 0
; SI-NEXT: BB3_3: ; %inner_loop
; SI-NEXT: .LBB3_3: ; %inner_loop
; SI-NEXT: ; Parent Loop BB3_2 Depth=1
; SI-NEXT: ; => This Inner Loop Header: Depth=2
; SI-NEXT: s_and_b64 s[8:9], exec, s[0:1]
Expand All @@ -161,13 +161,13 @@ define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: s_andn2_b64 exec, exec, s[2:3]
; SI-NEXT: s_cbranch_execnz BB3_3
; SI-NEXT: s_cbranch_execnz .LBB3_3
; SI-NEXT: ; %bb.4: ; %loop.exit.guard
; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1
; SI-NEXT: s_or_b64 exec, exec, s[2:3]
; SI-NEXT: s_mov_b64 vcc, 0
; SI-NEXT: s_branch BB3_2
; SI-NEXT: BB3_5: ; %UnifiedReturnBlock
; SI-NEXT: s_branch .LBB3_2
; SI-NEXT: .LBB3_5: ; %UnifiedReturnBlock
; SI-NEXT: s_endpgm
; IR-LABEL: @infinite_loop_nest_ret(
; IR-NEXT: entry:
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1534,19 +1534,19 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_cmp_lg_u32 s6, 0
; SI-NEXT: s_cbranch_scc0 BB30_2
; SI-NEXT: s_cbranch_scc0 .LBB30_2
; SI-NEXT: ; %bb.1: ; %else
; SI-NEXT: s_load_dword s7, s[2:3], 0x1
; SI-NEXT: s_mov_b64 s[4:5], 0
; SI-NEXT: s_andn2_b64 vcc, exec, s[4:5]
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b64 vcc, vcc
; SI-NEXT: s_cbranch_vccz BB30_3
; SI-NEXT: s_branch BB30_4
; SI-NEXT: BB30_2:
; SI-NEXT: BB30_3: ; %if
; SI-NEXT: s_cbranch_vccz .LBB30_3
; SI-NEXT: s_branch .LBB30_4
; SI-NEXT: .LBB30_2:
; SI-NEXT: .LBB30_3: ; %if
; SI-NEXT: s_load_dword s7, s[2:3], 0x0
; SI-NEXT: BB30_4: ; %endif
; SI-NEXT: .LBB30_4: ; %endif
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, s6
; SI-NEXT: s_mov_b32 s3, 0x100f000
Expand All @@ -1561,16 +1561,16 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_cmp_lg_u32 s6, 0
; VI-NEXT: s_cbranch_scc0 BB30_2
; VI-NEXT: s_cbranch_scc0 .LBB30_2
; VI-NEXT: ; %bb.1: ; %else
; VI-NEXT: s_load_dword s7, s[2:3], 0x4
; VI-NEXT: s_cbranch_execz BB30_3
; VI-NEXT: s_branch BB30_4
; VI-NEXT: BB30_2:
; VI-NEXT: BB30_3: ; %if
; VI-NEXT: s_cbranch_execz .LBB30_3
; VI-NEXT: s_branch .LBB30_4
; VI-NEXT: .LBB30_2:
; VI-NEXT: .LBB30_3: ; %if
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dword s7, s[2:3], 0x0
; VI-NEXT: BB30_4: ; %endif
; VI-NEXT: .LBB30_4: ; %endif
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s6
; VI-NEXT: s_mov_b32 s3, 0x1100f000
Expand Down
56 changes: 28 additions & 28 deletions llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,27 +17,27 @@ define amdgpu_ps void @return_void(float %0) #0 {
; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; CHECK-NEXT: s_cbranch_execz BB0_3
; CHECK-NEXT: BB0_1: ; %loop
; CHECK-NEXT: s_cbranch_execz .LBB0_3
; CHECK-NEXT: .LBB0_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; CHECK-NEXT: s_cbranch_scc0 BB0_6
; CHECK-NEXT: s_cbranch_scc0 .LBB0_6
; CHECK-NEXT: ; %bb.2: ; %loop
; CHECK-NEXT: ; in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: s_mov_b64 vcc, 0
; CHECK-NEXT: s_branch BB0_1
; CHECK-NEXT: BB0_3: ; %Flow1
; CHECK-NEXT: s_branch .LBB0_1
; CHECK-NEXT: .LBB0_3: ; %Flow1
; CHECK-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
; CHECK-NEXT: s_xor_b64 exec, exec, s[0:1]
; CHECK-NEXT: s_cbranch_execz BB0_5
; CHECK-NEXT: s_cbranch_execz .LBB0_5
; CHECK-NEXT: ; %bb.4: ; %end
; CHECK-NEXT: v_mov_b32_e32 v0, 1.0
; CHECK-NEXT: v_mov_b32_e32 v1, 0
; CHECK-NEXT: exp mrt0 v1, v1, v1, v0 done vm
; CHECK-NEXT: BB0_5: ; %UnifiedReturnBlock
; CHECK-NEXT: .LBB0_5: ; %UnifiedReturnBlock
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: BB0_6:
; CHECK-NEXT: .LBB0_6:
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
Expand All @@ -62,26 +62,26 @@ define amdgpu_ps void @return_void_compr(float %0) #0 {
; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; CHECK-NEXT: s_cbranch_execz BB1_3
; CHECK-NEXT: BB1_1: ; %loop
; CHECK-NEXT: s_cbranch_execz .LBB1_3
; CHECK-NEXT: .LBB1_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; CHECK-NEXT: s_cbranch_scc0 BB1_6
; CHECK-NEXT: s_cbranch_scc0 .LBB1_6
; CHECK-NEXT: ; %bb.2: ; %loop
; CHECK-NEXT: ; in Loop: Header=BB1_1 Depth=1
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: s_mov_b64 vcc, 0
; CHECK-NEXT: s_branch BB1_1
; CHECK-NEXT: BB1_3: ; %Flow1
; CHECK-NEXT: s_branch .LBB1_1
; CHECK-NEXT: .LBB1_3: ; %Flow1
; CHECK-NEXT: s_or_saveexec_b64 s[0:1], s[2:3]
; CHECK-NEXT: s_xor_b64 exec, exec, s[0:1]
; CHECK-NEXT: s_cbranch_execz BB1_5
; CHECK-NEXT: s_cbranch_execz .LBB1_5
; CHECK-NEXT: ; %bb.4: ; %end
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: exp mrt0 v0, off, v0, off done compr vm
; CHECK-NEXT: BB1_5: ; %UnifiedReturnBlock
; CHECK-NEXT: .LBB1_5: ; %UnifiedReturnBlock
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: BB1_6:
; CHECK-NEXT: .LBB1_6:
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
Expand All @@ -103,15 +103,15 @@ define amdgpu_ps void @only_kill() #0 {
; CHECK-LABEL: only_kill:
; CHECK: ; %bb.0: ; %main_body
; CHECK-NEXT: s_mov_b64 s[0:1], exec
; CHECK-NEXT: BB2_1: ; %loop
; CHECK-NEXT: .LBB2_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; CHECK-NEXT: s_cbranch_scc0 BB2_3
; CHECK-NEXT: s_cbranch_scc0 .LBB2_3
; CHECK-NEXT: ; %bb.2: ; %loop
; CHECK-NEXT: ; in Loop: Header=BB2_1 Depth=1
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: s_branch BB2_1
; CHECK-NEXT: BB2_3:
; CHECK-NEXT: s_branch .LBB2_1
; CHECK-NEXT: .LBB2_3:
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
Expand All @@ -132,25 +132,25 @@ define amdgpu_ps float @return_nonvoid(float %0) #0 {
; CHECK-NEXT: v_cmp_ngt_f32_e32 vcc, s2, v0
; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; CHECK-NEXT: s_cbranch_execz BB3_3
; CHECK-NEXT: BB3_1: ; %loop
; CHECK-NEXT: s_cbranch_execz .LBB3_3
; CHECK-NEXT: .LBB3_1: ; %loop
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
; CHECK-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; CHECK-NEXT: s_cbranch_scc0 BB3_4
; CHECK-NEXT: s_cbranch_scc0 .LBB3_4
; CHECK-NEXT: ; %bb.2: ; %loop
; CHECK-NEXT: ; in Loop: Header=BB3_1 Depth=1
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: s_mov_b64 vcc, exec
; CHECK-NEXT: s_cbranch_execnz BB3_1
; CHECK-NEXT: BB3_3: ; %Flow1
; CHECK-NEXT: s_cbranch_execnz .LBB3_1
; CHECK-NEXT: .LBB3_3: ; %Flow1
; CHECK-NEXT: s_or_b64 exec, exec, s[2:3]
; CHECK-NEXT: v_mov_b32_e32 v0, 0
; CHECK-NEXT: s_branch BB3_5
; CHECK-NEXT: BB3_4:
; CHECK-NEXT: s_branch .LBB3_5
; CHECK-NEXT: .LBB3_4:
; CHECK-NEXT: s_mov_b64 exec, 0
; CHECK-NEXT: exp null off, off, off, off done vm
; CHECK-NEXT: s_endpgm
; CHECK-NEXT: BB3_5:
; CHECK-NEXT: .LBB3_5:
main_body:
%cmp = fcmp olt float %0, 1.000000e+01
br i1 %cmp, label %end, label %loop
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/lds-m0-init-in-loop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,20 @@
; Make sure that m0 is not reinitialized in the loop.

; GCN-LABEL: {{^}}copy_local_to_global_loop_m0_init:
; GCN: s_cbranch_scc1 BB0_3
; GCN: s_cbranch_scc1 .LBB0_3

; Initialize in preheader
; GCN: s_mov_b32 m0, -1

; GCN: BB0_2:
; GCN: .LBB0_2:
; GCN-NOT: m0
; GCN: ds_read_b32
; GCN-NOT: m0
; GCN: buffer_store_dword

; GCN: s_cbranch_scc0 BB0_2
; GCN: s_cbranch_scc0 .LBB0_2

; GCN: BB0_3:
; GCN: .LBB0_3:
; GCN-NEXT: s_endpgm
define amdgpu_kernel void @copy_local_to_global_loop_m0_init(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(3)* noalias nocapture readonly %in, i32 %n) #0 {
bb:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.barrier.ll
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
; NOLOOP: ds_gws_barrier v0 gds{{$}}

; LOOP: s_mov_b32 m0, 0{{$}}
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
; LOOP-NEXT: ds_gws_barrier v0 gds
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.init.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
; GCN: v_mov_b32_e32 v0, [[BAR_NUM]]
; NOLOOP: ds_gws_init v0 gds{{$}}

; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
; LOOP-NEXT: ds_gws_init v0 gds
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand All @@ -32,7 +32,7 @@ define amdgpu_kernel void @gws_init_offset0(i32 %val) #0 {


; LOOP: s_mov_b32 m0, 0{{$}}
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
; LOOP-NEXT: ds_gws_init v0 offset:63 gds
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.br.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
; NOLOOP: ds_gws_sema_br v0 gds{{$}}

; LOOP: s_mov_b32 m0, 0{{$}}
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
; LOOP-NEXT: ds_gws_sema_br v0 gds
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.p.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
; NOLOOP: ds_gws_sema_p gds{{$}}

; LOOP: s_mov_b32 m0, 0{{$}}
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
; LOOP-NEXT: ds_gws_sema_p gds
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
; NOLOOP: ds_gws_sema_release_all gds{{$}}

; LOOP: s_mov_b32 m0, 0{{$}}
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
; LOOP-NEXT: ds_gws_sema_release_all gds
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.gws.sema.v.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
; NOLOOP: ds_gws_sema_v gds{{$}}

; LOOP: s_mov_b32 m0, 0{{$}}
; LOOP: [[LOOP:BB[0-9]+_[0-9]+]]:
; LOOP: [[LOOP:.LBB[0-9]+_[0-9]+]]:
; LOOP-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_TRAPSTS, 8, 1), 0
; LOOP-NEXT: ds_gws_sema_v gds
; LOOP-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ds.ordered.swap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ define amdgpu_cs float @ds_ordered_swap(i32 addrspace(2)* inreg %gds, i32 %value
; GCN: v_cmp_ne_u32_e32 vcc, 0, v[[VALUE:[0-9]+]]
; GCN: s_and_saveexec_b64 s[[SAVED:\[[0-9]+:[0-9]+\]]], vcc
; // We have to use s_cbranch, because ds_ordered_count has side effects with EXEC=0
; GCN: s_cbranch_execz [[BB:BB._.]]
; GCN: s_cbranch_execz [[BB:.LBB._.]]
; GCN: s_mov_b32 m0, s0
; VIGFX9-NEXT: s_nop 0
; GCN-NEXT: ds_ordered_count v{{[0-9]+}}, v[[VALUE]] offset:4868 gds
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.set.inactive.ll
Original file line number Diff line number Diff line change
Expand Up @@ -58,19 +58,19 @@ define amdgpu_kernel void @set_inactive_scc(i32 addrspace(1)* %out, i32 %in, <4
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s0, 56
; GCN-NEXT: s_mov_b64 s[0:1], -1
; GCN-NEXT: s_cbranch_scc1 BB2_3
; GCN-NEXT: s_cbranch_scc1 .LBB2_3
; GCN-NEXT: ; %bb.1: ; %Flow
; GCN-NEXT: s_andn2_b64 vcc, exec, s[0:1]
; GCN-NEXT: s_cbranch_vccz BB2_4
; GCN-NEXT: BB2_2: ; %.exit
; GCN-NEXT: s_cbranch_vccz .LBB2_4
; GCN-NEXT: .LBB2_2: ; %.exit
; GCN-NEXT: s_endpgm
; GCN-NEXT: BB2_3: ; %.one
; GCN-NEXT: .LBB2_3: ; %.one
; GCN-NEXT: v_add_u32_e32 v1, vcc, 1, v0
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, -1
; GCN-NEXT: buffer_store_dword v1, off, s[4:7], 0
; GCN-NEXT: s_cbranch_execnz BB2_2
; GCN-NEXT: BB2_4: ; %.zero
; GCN-NEXT: s_cbranch_execnz .LBB2_2
; GCN-NEXT: .LBB2_4: ; %.zero
; GCN-NEXT: s_mov_b32 s7, 0xf000
; GCN-NEXT: s_mov_b32 s6, -1
; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
; GFX10-LABEL: main:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_mov_b32 s1, exec_lo
; GFX10-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
; GFX10-NEXT: v_readfirstlane_b32 s4, v0
; GFX10-NEXT: v_readfirstlane_b32 s5, v1
; GFX10-NEXT: v_readfirstlane_b32 s6, v2
Expand All @@ -22,7 +22,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
; GFX10-NEXT: ; implicit-def: $vgpr4
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_xor_b32 exec_lo, exec_lo, s0
; GFX10-NEXT: s_cbranch_execnz BB0_1
; GFX10-NEXT: s_cbranch_execnz .LBB0_1
; GFX10-NEXT: ; %bb.2:
; GFX10-NEXT: s_mov_b32 exec_lo, s1
; GFX10-NEXT: s_waitcnt vmcnt(0)
Expand All @@ -35,7 +35,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
; GFX9-LABEL: main:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_mov_b64 s[2:3], exec
; GFX9-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
; GFX9-NEXT: v_readfirstlane_b32 s4, v0
; GFX9-NEXT: v_readfirstlane_b32 s5, v1
; GFX9-NEXT: v_readfirstlane_b32 s6, v2
Expand All @@ -49,7 +49,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX9-NEXT: ; implicit-def: $vgpr4
; GFX9-NEXT: s_xor_b64 exec, exec, s[0:1]
; GFX9-NEXT: s_cbranch_execnz BB0_1
; GFX9-NEXT: s_cbranch_execnz .LBB0_1
; GFX9-NEXT: ; %bb.2:
; GFX9-NEXT: s_mov_b64 exec, s[2:3]
; GFX9-NEXT: s_waitcnt vmcnt(0)
Expand All @@ -61,7 +61,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
; GFX8-LABEL: main:
; GFX8: ; %bb.0: ; %bb
; GFX8-NEXT: s_mov_b64 s[2:3], exec
; GFX8-NEXT: BB0_1: ; =>This Inner Loop Header: Depth=1
; GFX8-NEXT: .LBB0_1: ; =>This Inner Loop Header: Depth=1
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: v_readfirstlane_b32 s5, v1
; GFX8-NEXT: v_readfirstlane_b32 s6, v2
Expand All @@ -75,7 +75,7 @@ define amdgpu_gs void @main(<4 x i32> %arg, i32 %arg1) {
; GFX8-NEXT: ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
; GFX8-NEXT: ; implicit-def: $vgpr4
; GFX8-NEXT: s_xor_b64 exec, exec, s[0:1]
; GFX8-NEXT: s_cbranch_execnz BB0_1
; GFX8-NEXT: s_cbranch_execnz .LBB0_1
; GFX8-NEXT: ; %bb.2:
; GFX8-NEXT: s_mov_b64 exec, s[2:3]
; GFX8-NEXT: s_waitcnt vmcnt(0)
Expand Down
Loading