186 changes: 0 additions & 186 deletions llvm/test/CodeGen/AMDGPU/flat-scratch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -309,21 +309,6 @@ define void @zero_init_foo() {
; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: zero_init_foo:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s0, 0
; GCN-NEXT: s_mov_b32 s1, s0
; GCN-NEXT: s_mov_b32 s2, s0
; GCN-NEXT: s_mov_b32 s3, s0
; GCN-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GCN-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:48
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca [32 x i16], align 2, addrspace(5)
call void @llvm.memset.p5.i64(ptr addrspace(5) align 2 dereferenceable(64) %alloca, i8 0, i64 64, i1 false)
ret void
Expand Down Expand Up @@ -462,21 +447,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_endpgm
; GCN-LABEL: store_load_sindex_kernel:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dword s0, s[0:1], 0x24
; GCN-NEXT: v_mov_b32_e32 v0, 15
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_lshl_b32 s1, s0, 2
; GCN-NEXT: s_and_b32 s0, s0, 15
; GCN-NEXT: s_lshl_b32 s0, s0, 2
; GCN-NEXT: s_add_u32 s1, 4, s1
; GCN-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_add_u32 s0, 4, s0
; GCN-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_endpgm
bb:
%i = alloca [32 x float], align 4, addrspace(5)
%i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
Expand Down Expand Up @@ -607,19 +577,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_endpgm
; GCN-LABEL: store_load_sindex_foo:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_lshl_b32 s1, s0, 2
; GCN-NEXT: s_and_b32 s0, s0, 15
; GCN-NEXT: s_lshl_b32 s0, s0, 2
; GCN-NEXT: s_add_u32 s1, 4, s1
; GCN-NEXT: v_mov_b32_e32 v0, 15
; GCN-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_add_u32 s0, 4, s0
; GCN-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_endpgm
bb:
%i = alloca [32 x float], align 4, addrspace(5)
%i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
Expand Down Expand Up @@ -740,16 +697,6 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, off glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_endpgm
; GCN-LABEL: store_load_vindex_kernel:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 15
; GCN-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_sub_u32_e32 v0, 4, v0
; GCN-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_endpgm
bb:
%i = alloca [32 x float], align 4, addrspace(5)
%i2 = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -860,18 +807,6 @@ define void @store_load_vindex_foo(i32 %idx) {
; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, s32 glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_vindex_foo:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 15
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: v_and_b32_e32 v0, v0, v2
; GCN-NEXT: scratch_store_dword v1, v2, s32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
%i = alloca [32 x float], align 4, addrspace(5)
%i7 = getelementptr inbounds [32 x float], ptr addrspace(5) %i, i32 0, i32 %idx
Expand Down Expand Up @@ -947,13 +882,6 @@ define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) {
; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, off
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: private_ptr_foo:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, 0x41200000
; GCN-NEXT: scratch_store_dword v0, v1, off offset:4
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds float, ptr addrspace(5) %arg, i32 1
store float 1.000000e+01, ptr addrspace(5) %gep, align 4
ret void
Expand Down Expand Up @@ -1296,23 +1224,6 @@ define void @zero_init_small_offset_foo() {
; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:304
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: zero_init_small_offset_foo:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: scratch_load_dword v0, off, s32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_mov_b32 s0, 0
; GCN-NEXT: s_mov_b32 s1, s0
; GCN-NEXT: s_mov_b32 s2, s0
; GCN-NEXT: s_mov_b32 s3, s0
; GCN-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GCN-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:256
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%padding = alloca [64 x i32], align 4, addrspace(5)
%alloca = alloca [32 x i16], align 2, addrspace(5)
%pad_gep = getelementptr inbounds [64 x i32], ptr addrspace(5) %padding, i32 0, i32 undef
Expand Down Expand Up @@ -1970,20 +1881,6 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, s32 offset:256 glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_vindex_small_offset_foo:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: scratch_load_dword v1, off, s32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 15
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: v_and_b32_e32 v0, v0, v2
; GCN-NEXT: scratch_store_dword v1, v2, s32 offset:256 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
%padding = alloca [64 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
Expand Down Expand Up @@ -3048,22 +2945,6 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, vcc_lo glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_vindex_large_offset_foo:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: scratch_load_dword v1, off, s32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 15
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: v_and_b32_e32 v0, v0, v2
; GCN-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GCN-NEXT: scratch_store_dword v1, v2, vcc_hi sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GCN-NEXT: scratch_load_dword v0, v0, vcc_hi sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
%padding = alloca [4096 x i32], align 4, addrspace(5)
%i = alloca [32 x float], align 4, addrspace(5)
Expand Down Expand Up @@ -3350,19 +3231,6 @@ define void @store_load_large_imm_offset_foo() {
; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s0 offset:3712 glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_large_imm_offset_foo:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, 13
; GCN-NEXT: scratch_store_dword off, v0, s32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, 0x3000
; GCN-NEXT: v_mov_b32_e32 v1, 15
; GCN-NEXT: scratch_store_dword v0, v1, s32 offset:3712 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_load_dword v0, v0, s32 offset:3712 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
%i = alloca [4096 x i32], align 4, addrspace(5)
%i1 = getelementptr inbounds [4096 x i32], ptr addrspace(5) %i, i32 0, i32 undef
Expand Down Expand Up @@ -3498,17 +3366,6 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
; GFX11-PAL-NEXT: scratch_load_b32 v0, v0, off glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_endpgm
; GCN-LABEL: store_load_vidx_sidx_offset:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dword s0, s[0:1], 0x24
; GCN-NEXT: v_mov_b32_e32 v1, 15
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_lshl_u32 v0, s0, v0, 2
; GCN-NEXT: scratch_store_dword v0, v1, off offset:1028 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_load_dword v0, v0, off offset:1028 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_endpgm
bb:
%alloca = alloca [32 x i32], align 4, addrspace(5)
%vidx = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -3599,16 +3456,6 @@ define void @store_load_i64_aligned(ptr addrspace(5) nocapture %arg) {
; GFX11-PAL-NEXT: scratch_load_b64 v[0:1], v0, off glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_i64_aligned:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 15
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile i64 15, ptr addrspace(5) %arg, align 8
%load = load volatile i64, ptr addrspace(5) %arg, align 8
Expand Down Expand Up @@ -3694,16 +3541,6 @@ define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) {
; GFX11-PAL-NEXT: scratch_load_b64 v[0:1], v0, off glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_i64_unaligned:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 15
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile i64 15, ptr addrspace(5) %arg, align 1
%load = load volatile i64, ptr addrspace(5) %arg, align 1
Expand Down Expand Up @@ -3796,17 +3633,6 @@ define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) {
; GFX11-PAL-NEXT: scratch_load_b96 v[0:2], v0, off glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_v3i32_unaligned:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 1
; GCN-NEXT: v_mov_b32_e32 v3, 2
; GCN-NEXT: v_mov_b32_e32 v4, 3
; GCN-NEXT: scratch_store_dwordx3 v0, v[2:4], off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_load_dwordx3 v[0:2], v0, off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile <3 x i32> <i32 1, i32 2, i32 3>, ptr addrspace(5) %arg, align 1
%load = load volatile <3 x i32>, ptr addrspace(5) %arg, align 1
Expand Down Expand Up @@ -3904,18 +3730,6 @@ define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) {
; GFX11-PAL-NEXT: scratch_load_b128 v[0:3], v0, off glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_v4i32_unaligned:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 1
; GCN-NEXT: v_mov_b32_e32 v3, 2
; GCN-NEXT: v_mov_b32_e32 v4, 3
; GCN-NEXT: v_mov_b32_e32 v5, 4
; GCN-NEXT: scratch_store_dwordx4 v0, v[2:5], off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_load_dwordx4 v[0:3], v0, off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %arg, align 1
%load = load volatile <4 x i32>, ptr addrspace(5) %arg, align 1
Expand Down
20 changes: 0 additions & 20 deletions llvm/test/CodeGen/AMDGPU/fneg-combines.legal.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -82,15 +82,6 @@ define half @v_fneg_amdgcn_sin_f16(half %a) #0 {
; --------------------------------------------------------------------------------

define { float, float } @v_fneg_interp_p1_f16(float %a, float %b) #0 {
; SI-LABEL: v_fneg_interp_p1_f16:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_mul_f16_e64 v1, v0, -v1
; SI-NEXT: s_mov_b32 m0, 0
; SI-NEXT: v_interp_p1_f16 v0, v1, attr0.x
; SI-NEXT: v_interp_p1_f16 v1, v1, attr0.y
; SI-NEXT: s_setpc_b64 s[30:31]
;
; GCN-LABEL: v_fneg_interp_p1_f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand All @@ -110,17 +101,6 @@ define { float, float } @v_fneg_interp_p1_f16(float %a, float %b) #0 {
}

define { half, half } @v_fneg_interp_p2_f16(float %a, float %b) #0 {
; SI-LABEL: v_fneg_interp_p2_f16:
; SI: ; %bb.0:
; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; SI-NEXT: v_mul_f16_e64 v2, v0, -v1
; SI-NEXT: v_mov_b32_e32 v1, 4.0
; SI-NEXT: v_mov_b32_e32 v0, 4.0
; SI-NEXT: s_mov_b32 m0, 0
; SI-NEXT: v_interp_p2_f16 v0, v2, attr0.x
; SI-NEXT: v_interp_p2_f16 v1, v2, attr0.y
; SI-NEXT: s_setpc_b64 s[30:31]
;
; GCN-LABEL: v_fneg_interp_p2_f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down
1 change: 0 additions & 1 deletion llvm/test/CodeGen/AMDGPU/fp-min-max-buffer-atomics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -362,7 +362,6 @@ define amdgpu_kernel void @raw_buffer_atomic_min_rtn_f32_off4_slc(<4 x i32> inre
; G_GFX1100-NEXT: s_waitcnt vmcnt(0)
; G_GFX1100-NEXT: ds_store_b32 v1, v0
; G_GFX1100-NEXT: s_endpgm
; GFX1010-LABEL: raw_buffer_atomic_min_rtn_f32_off4_slc:
main_body:
%ret = call float @llvm.amdgcn.raw.buffer.atomic.fmin.f32(float %data, <4 x i32> %rsrc, i32 %vindex, i32 4, i32 2)
store float %ret, ptr addrspace(3) %out, align 8
Expand Down
5 changes: 0 additions & 5 deletions llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -213,11 +213,6 @@ define amdgpu_gfx void @void_func_void_clobber_s28_s29() #1 {
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "; clobber", "~{s[30:31]}"() #0
; GCN: v_writelane_b32 v0, s28, 0
; GCN: v_writelane_b32 v0, s29, 1

; GCN: v_readlane_b32 s28, v0, 0
; GCN: v_readlane_b32 s29, v0, 1
call void asm sideeffect "; clobber", "~{s[28:29]}"() #0
ret void
}
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/hazard-inlineasm.mir
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

...

# GCN-LABEL: name: hazard-inlineasm
# CHECK-LABEL: name: hazard-inlineasm
# CHECK: FLAT_STORE_DWORDX4
# CHECK-NEXT: S_NOP 0
# CHECK-NEXT: INLINEASM
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/hazard-kill.mir
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
define amdgpu_ps void @_amdgpu_ps_main() #0 { ret void }
...
---
# CHECK-LABEL: name: _amdgpu_ps_main
# CHECK-LABEL: bb.0:
# GFX90-LABEL: name: _amdgpu_ps_main
# GFX90-LABEL: bb.0:
# GFX90: $m0 = S_MOV_B32 killed renamable $sgpr4
# GFX90-NEXT: KILL undef renamable $sgpr2
# GFX90-NEXT: S_MOV_B32 0
Expand Down
357 changes: 0 additions & 357 deletions llvm/test/CodeGen/AMDGPU/idot8s.ll

Large diffs are not rendered by default.

8 changes: 0 additions & 8 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fcmp.w64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,6 @@ define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(ptr addrspace(
}

define amdgpu_kernel void @v_fcmp_f32(ptr addrspace(1) %out, float %src) {
; SDAG-GFX-LABEL: v_fcmp_f32:
; SDAG-GFX: ; %bb.0:
; SDAG-GFX-NEXT: s_endpgm
;
; GFX11-SDAG-LABEL: v_fcmp_f32:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_endpgm
Expand Down Expand Up @@ -1970,10 +1966,6 @@ define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(ptr addrspace(
}

define amdgpu_kernel void @v_fcmp_f16(ptr addrspace(1) %out, half %src) {
; SDAG-GFX-LABEL: v_fcmp_f16:
; SDAG-GFX: ; %bb.0:
; SDAG-GFX-NEXT: s_endpgm
;
; GFX11-SDAG-LABEL: v_fcmp_f16:
; GFX11-SDAG: ; %bb.0:
; GFX11-SDAG-NEXT: s_endpgm
Expand Down
7 changes: 0 additions & 7 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fma.legacy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,6 @@ define float @v_fma(float %a, float %b, float %c) {
}

define float @v_fma_imm(float %a, float %c) {
; GCN-LABEL: v_fma_imm:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_waitcnt_vscnt null, 0x0
; GCN-NEXT: v_fmac_legacy_f32_e32 v1, 0x41200000, v0
; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: s_setpc_b64 s[30:31]
; GFX10-LABEL: v_fma_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
Expand Down
84 changes: 0 additions & 84 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.dim.ll
Original file line number Diff line number Diff line change
Expand Up @@ -3508,34 +3508,6 @@ main_body:
}

define amdgpu_ps <3 x float> @getresinfo_dmask7(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) {
; GFX6-LABEL: getresinfo_dmask7:
; GFX6: ; %bb.0: ; %main_body
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: s_mov_b32 s2, s4
; GFX6-NEXT: s_mov_b32 s3, s5
; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s7
; GFX6-NEXT: s_mov_b32 s6, s8
; GFX6-NEXT: s_mov_b32 s7, s9
; GFX6-NEXT: image_get_resinfo v[0:2], v0, s[0:7] dmask:0x7 unorm
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: getresinfo_dmask7:
; GFX8: ; %bb.0: ; %main_body
; GFX8-NEXT: s_mov_b32 s0, s2
; GFX8-NEXT: s_mov_b32 s1, s3
; GFX8-NEXT: s_mov_b32 s2, s4
; GFX8-NEXT: s_mov_b32 s3, s5
; GFX8-NEXT: s_mov_b32 s4, s6
; GFX8-NEXT: s_mov_b32 s5, s7
; GFX8-NEXT: s_mov_b32 s6, s8
; GFX8-NEXT: s_mov_b32 s7, s9
; GFX8-NEXT: image_get_resinfo v[0:2], v0, s[0:7] dmask:0x7 unorm
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: ; return to shader part epilog
;
; VERDE-LABEL: getresinfo_dmask7:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_get_resinfo v[0:2], v0, s[0:7] dmask:0x7 unorm
Expand Down Expand Up @@ -3571,34 +3543,6 @@ main_body:
}

define amdgpu_ps <2 x float> @getresinfo_dmask3(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) {
; GFX6-LABEL: getresinfo_dmask3:
; GFX6: ; %bb.0: ; %main_body
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: s_mov_b32 s2, s4
; GFX6-NEXT: s_mov_b32 s3, s5
; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s7
; GFX6-NEXT: s_mov_b32 s6, s8
; GFX6-NEXT: s_mov_b32 s7, s9
; GFX6-NEXT: image_get_resinfo v[0:1], v0, s[0:7] dmask:0x3 unorm
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: getresinfo_dmask3:
; GFX8: ; %bb.0: ; %main_body
; GFX8-NEXT: s_mov_b32 s0, s2
; GFX8-NEXT: s_mov_b32 s1, s3
; GFX8-NEXT: s_mov_b32 s2, s4
; GFX8-NEXT: s_mov_b32 s3, s5
; GFX8-NEXT: s_mov_b32 s4, s6
; GFX8-NEXT: s_mov_b32 s5, s7
; GFX8-NEXT: s_mov_b32 s6, s8
; GFX8-NEXT: s_mov_b32 s7, s9
; GFX8-NEXT: image_get_resinfo v[0:1], v0, s[0:7] dmask:0x3 unorm
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: ; return to shader part epilog
;
; VERDE-LABEL: getresinfo_dmask3:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_get_resinfo v[0:1], v0, s[0:7] dmask:0x3 unorm
Expand Down Expand Up @@ -3634,34 +3578,6 @@ main_body:
}

define amdgpu_ps float @getresinfo_dmask1(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) {
; GFX6-LABEL: getresinfo_dmask1:
; GFX6: ; %bb.0: ; %main_body
; GFX6-NEXT: s_mov_b32 s0, s2
; GFX6-NEXT: s_mov_b32 s1, s3
; GFX6-NEXT: s_mov_b32 s2, s4
; GFX6-NEXT: s_mov_b32 s3, s5
; GFX6-NEXT: s_mov_b32 s4, s6
; GFX6-NEXT: s_mov_b32 s5, s7
; GFX6-NEXT: s_mov_b32 s6, s8
; GFX6-NEXT: s_mov_b32 s7, s9
; GFX6-NEXT: image_get_resinfo v0, v0, s[0:7] dmask:0x1 unorm
; GFX6-NEXT: s_waitcnt vmcnt(0)
; GFX6-NEXT: ; return to shader part epilog
;
; GFX8-LABEL: getresinfo_dmask1:
; GFX8: ; %bb.0: ; %main_body
; GFX8-NEXT: s_mov_b32 s0, s2
; GFX8-NEXT: s_mov_b32 s1, s3
; GFX8-NEXT: s_mov_b32 s2, s4
; GFX8-NEXT: s_mov_b32 s3, s5
; GFX8-NEXT: s_mov_b32 s4, s6
; GFX8-NEXT: s_mov_b32 s5, s7
; GFX8-NEXT: s_mov_b32 s6, s8
; GFX8-NEXT: s_mov_b32 s7, s9
; GFX8-NEXT: image_get_resinfo v0, v0, s[0:7] dmask:0x1 unorm
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: ; return to shader part epilog
;
; VERDE-LABEL: getresinfo_dmask1:
; VERDE: ; %bb.0: ; %main_body
; VERDE-NEXT: image_get_resinfo v0, v0, s[0:7] dmask:0x1 unorm
Expand Down
19 changes: 0 additions & 19 deletions llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1044,7 +1044,6 @@ define amdgpu_kernel void @bfe_u32_test_14(ptr addrspace(1) %out, ptr addrspace(
store i32 %bfe, ptr addrspace(1) %out, align 4 ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_0:
; SI: ; %bb.0:
Expand All @@ -1070,7 +1069,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_0(ptr addrspace(1) %out) #
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_1:
; SI: ; %bb.0:
Expand All @@ -1096,7 +1094,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_1(ptr addrspace(1) %out) #
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_2:
; SI: ; %bb.0:
Expand All @@ -1122,7 +1119,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_2(ptr addrspace(1) %out) #
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_3:
; SI: ; %bb.0:
Expand All @@ -1148,7 +1144,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_3(ptr addrspace(1) %out) #
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_4:
; SI: ; %bb.0:
Expand All @@ -1174,7 +1169,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_4(ptr addrspace(1) %out) #
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_5:
; SI: ; %bb.0:
Expand All @@ -1200,7 +1194,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_5(ptr addrspace(1) %out) #
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_6:
; SI: ; %bb.0:
Expand All @@ -1226,7 +1219,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_6(ptr addrspace(1) %out) #
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_7:
; SI: ; %bb.0:
Expand All @@ -1252,7 +1244,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_7(ptr addrspace(1) %out) #
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_8:
; SI: ; %bb.0:
Expand All @@ -1278,7 +1269,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_8(ptr addrspace(1) %out) #
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_9:
; SI: ; %bb.0:
Expand All @@ -1304,7 +1294,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_9(ptr addrspace(1) %out) #
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_10:
; SI: ; %bb.0:
Expand All @@ -1330,7 +1319,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_10(ptr addrspace(1) %out)
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_11:
; SI: ; %bb.0:
Expand All @@ -1356,7 +1344,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_11(ptr addrspace(1) %out)
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_12:
; SI: ; %bb.0:
Expand All @@ -1382,7 +1369,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_12(ptr addrspace(1) %out)
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_13:
; SI: ; %bb.0:
Expand All @@ -1408,7 +1394,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_13(ptr addrspace(1) %out)
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_14:
; SI: ; %bb.0:
Expand All @@ -1434,7 +1419,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_14(ptr addrspace(1) %out)
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_15:
; SI: ; %bb.0:
Expand All @@ -1460,7 +1444,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_15(ptr addrspace(1) %out)
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_16:
; SI: ; %bb.0:
Expand All @@ -1486,7 +1469,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_16(ptr addrspace(1) %out)
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_17:
; SI: ; %bb.0:
Expand All @@ -1512,7 +1494,6 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_17(ptr addrspace(1) %out)
ret void
}

; EG-NOT: BFE
define amdgpu_kernel void @bfe_u32_constant_fold_test_18(ptr addrspace(1) %out) #0 {
; SI-LABEL: bfe_u32_constant_fold_test_18:
; SI: ; %bb.0:
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/noclobber-barrier.ll
Original file line number Diff line number Diff line change
Expand Up @@ -543,8 +543,8 @@ entry:
}

; GCN-LABEL: {{^}}no_alias_atomic_rmw_then_clobber:
; CGN: global_store_dword
; CGN: global_store_dword
; GCN: global_store_dword
; GCN: global_store_dword
; GCN: ds_add_u32
; GCN: global_load_dword
; GCN: global_store_dword
Expand Down Expand Up @@ -574,7 +574,7 @@ entry:
}

; GCN-LABEL: {{^}}no_alias_atomic_rmw_then_no_alias_store:
; CGN: global_store_dword
; GCN: global_store_dword
; GCN: ds_add_u32
; GCN: s_load_dword s
; GCN-NOT: global_load_dword
Expand Down
389 changes: 0 additions & 389 deletions llvm/test/CodeGen/AMDGPU/sgpr-spill.mir

Large diffs are not rendered by default.

445 changes: 0 additions & 445 deletions llvm/test/CodeGen/AMDGPU/spill-agpr.mir

Large diffs are not rendered by default.

74 changes: 0 additions & 74 deletions llvm/test/CodeGen/AMDGPU/wqm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1709,16 +1709,6 @@ main_body:
}

; ... but only if WQM is necessary.
; CHECK-LABEL: {{^}}test_kill_1:
; CHECK-NEXT: ; %main_body
; CHECK: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec
; CHECK: s_wqm_b64 exec, exec
; CHECK: image_sample
; CHECK: s_and_b64 exec, exec, [[ORIG]]
; CHECK: image_sample
; CHECK-NOT: wqm
; CHECK-DAG: buffer_store_dword
; CHECK-DAG: v_cmp_
define amdgpu_ps <4 x float> @test_kill_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, i32 %idx, float %data, float %coord, float %coord2, float %z) {
; GFX9-W64-LABEL: test_kill_1:
; GFX9-W64: ; %bb.0: ; %main_body
Expand Down Expand Up @@ -1782,11 +1772,6 @@ main_body:
}

; Check prolog shaders.
; CHECK-LABEL: {{^}}test_prolog_1:
; CHECK: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec
; CHECK: s_wqm_b64 exec, exec
; CHECK: v_add_f32_e32 v0,
; CHECK: s_and_b64 exec, exec, [[ORIG]]
define amdgpu_ps float @test_prolog_1(float %a, float %b) #5 {
; GFX9-W64-LABEL: test_prolog_1:
; GFX9-W64: ; %bb.0: ; %main_body
Expand All @@ -1808,28 +1793,6 @@ main_body:
ret float %s
}

; CHECK-LABEL: {{^}}test_loop_vcc:
; CHECK-NEXT: ; %entry
; CHECK-NEXT: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
; CHECK: s_wqm_b64 exec, exec
; CHECK: v_mov
; CHECK: v_mov
; CHECK: v_mov
; CHECK: v_mov
; CHECK: s_and_b64 exec, exec, [[LIVE]]
; CHECK: image_store
; CHECK: s_wqm_b64 exec, exec
; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0
; CHECK-DAG: s_mov_b32 [[SEVEN:s[0-9]+]], 0x40e00000

; CHECK: [[LOOPHDR:.LBB[0-9]+_[0-9]+]]: ; %body
; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]]
; CHECK: [[LOOP:.LBB[0-9]+_[0-9]+]]: ; %loop
; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]]
; CHECK: s_cbranch_vccz [[LOOPHDR]]

; CHECK: ; %break
; CHECK: ; return
define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) nounwind {
; GFX9-W64-LABEL: test_loop_vcc:
; GFX9-W64: ; %bb.0: ; %entry
Expand Down Expand Up @@ -1925,22 +1888,6 @@ break:

; Only intrinsic stores need exact execution -- other stores do not have
; externally visible effects and may require WQM for correctness.
; CHECK-LABEL: {{^}}test_alloca:
; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
; CHECK: s_wqm_b64 exec, exec

; CHECK: s_and_b64 exec, exec, [[LIVE]]
; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0
; CHECK: s_wqm_b64 exec, exec
; CHECK: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4{{$}}
; CHECK: s_and_b64 exec, exec, [[LIVE]]
; CHECK: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 idxen
; CHECK: s_wqm_b64 exec, exec
; CHECK: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0 offen

; CHECK: s_and_b64 exec, exec, [[LIVE]]
; CHECK: image_sample
; CHECK: buffer_store_dwordx4
define amdgpu_ps void @test_alloca(float %data, i32 %a, i32 %idx) nounwind {
; GFX9-W64-LABEL: test_alloca:
; GFX9-W64: ; %bb.0: ; %entry
Expand Down Expand Up @@ -2020,11 +1967,6 @@ entry:
; otherwise the EXEC mask exported by the epilog will be wrong. This is true
; even if the shader has no kills, because a kill could have happened in a
; previous shader fragment.
; CHECK-LABEL: {{^}}test_nonvoid_return:
; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
; CHECK: s_wqm_b64 exec, exec
; CHECK: s_and_b64 exec, exec, [[LIVE]]
; CHECK-NOT: exec
define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
; GFX9-W64-LABEL: test_nonvoid_return:
; GFX9-W64: ; %bb.0:
Expand Down Expand Up @@ -2053,11 +1995,6 @@ define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
ret <4 x float> %dtex
}

; CHECK-LABEL: {{^}}test_nonvoid_return_unreachable:
; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
; CHECK: s_wqm_b64 exec, exec
; CHECK: s_and_b64 exec, exec, [[LIVE]]
; CHECK-NOT: exec
define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind {
; GFX9-W64-LABEL: test_nonvoid_return_unreachable:
; GFX9-W64: ; %bb.0: ; %entry
Expand Down Expand Up @@ -2110,17 +2047,6 @@ else:
}

; Test awareness that s_wqm_b64 clobbers SCC.
; CHECK-LABEL: {{^}}test_scc:
; CHECK: s_mov_b64 [[ORIG:s\[[0-9]+:[0-9]+\]]], exec
; CHECK: s_wqm_b64 exec, exec
; CHECK: s_cmp_
; CHECK-NEXT: s_cbranch_scc
; CHECK: ; %else
; CHECK: image_sample
; CHECK: ; %if
; CHECK: image_sample
; CHECK: ; %end
; CHECK: s_and_b64 exec, exec, [[ORIG]]
define amdgpu_ps <4 x float> @test_scc(i32 inreg %sel, i32 %idx) #1 {
; GFX9-W64-LABEL: test_scc:
; GFX9-W64: ; %bb.0: ; %main_body
Expand Down