278 changes: 139 additions & 139 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll

Large diffs are not rendered by default.

527 changes: 263 additions & 264 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll

Large diffs are not rendered by default.

292 changes: 146 additions & 146 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll

Large diffs are not rendered by default.

545 changes: 273 additions & 272 deletions llvm/test/CodeGen/AMDGPU/add.ll

Large diffs are not rendered by default.

268 changes: 134 additions & 134 deletions llvm/test/CodeGen/AMDGPU/add.v2i16.ll

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/amd.endpgm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ define amdgpu_kernel void @test2(ptr %p, i32 %x) {
; GFX9-NEXT: s_cmp_lt_i32 s2, 1
; GFX9-NEXT: s_cbranch_scc0 .LBB2_2
; GFX9-NEXT: ; %bb.1: ; %else
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, s2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: v_mov_b32_e32 v1, s5
; GFX9-NEXT: flat_store_dword v[0:1], v2
; GFX9-NEXT: s_endpgm
; GFX9-NEXT: .LBB2_2: ; %then
Expand All @@ -63,11 +63,11 @@ define amdgpu_kernel void @test2(ptr %p, i32 %x) {
; GFX10-NEXT: s_cmp_lt_i32 s2, 1
; GFX10-NEXT: s_cbranch_scc0 .LBB2_2
; GFX10-NEXT: ; %bb.1: ; %else
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; GFX10-NEXT: v_mov_b32_e32 v2, s2
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: v_mov_b32_e32 v1, s5
; GFX10-NEXT: flat_store_dword v[0:1], v2
; GFX10-NEXT: s_endpgm
; GFX10-NEXT: .LBB2_2: ; %then
Expand All @@ -80,10 +80,10 @@ define amdgpu_kernel void @test2(ptr %p, i32 %x) {
; GFX11-NEXT: s_cmp_lt_i32 s2, 1
; GFX11-NEXT: s_cbranch_scc0 .LBB2_2
; GFX11-NEXT: ; %bb.1: ; %else
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX11-NEXT: flat_store_b32 v[0:1], v2
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB2_2: ; %then
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/amdgcn-load-offset-from-reg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,12 @@ define amdgpu_ps void @test_sgpr_plus_imm_offset(ptr addrspace(4) inreg %base, i
; SDAG-DAG: %[[BASE1:.*]]:sgpr_32 = COPY $sgpr1
; SDAG-DAG: %[[OFFSET:.*]]:sgpr_32 = COPY $sgpr2
; SDAG-DAG: %[[BASE:.*]]:sgpr_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1
; SDAG: S_LOAD_DWORDX2_SGPR_IMM killed %[[BASE]], %[[OFFSET]], 16,
; SDAG: S_LOAD_DWORDX2_SGPR_IMM_ec killed %[[BASE]], %[[OFFSET]], 16,
; GISEL-DAG: %[[BASE0:.*]]:sreg_32 = COPY $sgpr0
; GISEL-DAG: %[[BASE1:.*]]:sreg_32 = COPY $sgpr1
; GISEL-DAG: %[[OFFSET:.*]]:sreg_32 = COPY $sgpr2
; GISEL-DAG: %[[BASE:.*]]:sreg_64 = REG_SEQUENCE %[[BASE0]], %subreg.sub0, %[[BASE1]], %subreg.sub1
; GISEL: S_LOAD_DWORDX2_SGPR_IMM %[[BASE]], %[[OFFSET]], 16,
; GISEL: S_LOAD_DWORDX2_SGPR_IMM_ec %[[BASE]], %[[OFFSET]], 16,
define amdgpu_ps void @test_sgpr_plus_imm_offset_x2(ptr addrspace(4) inreg %base, i32 inreg %offset,
ptr addrspace(1) inreg %out) {
%v1 = getelementptr i8, ptr addrspace(4) %base, i64 16
Expand Down
1,225 changes: 614 additions & 611 deletions llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/and.ll
Original file line number Diff line number Diff line change
Expand Up @@ -227,10 +227,10 @@ define amdgpu_kernel void @s_and_32_bit_constant_i64(ptr addrspace(1) %out, i32,
; SI: s_load_dword [[B:s[0-9]+]]
; SI: s_load_dwordx2
; SI-NOT: and
; SI: s_lshl_b32 [[A]], [[A]], 1
; SI: s_lshl_b32 [[B]], [[B]], 1
; SI: s_and_b32 s{{[0-9]+}}, [[A]], 62
; SI: s_and_b32 s{{[0-9]+}}, [[B]], 62
; SI: s_lshl_b32 [[C:s[0-9]+]], [[A]], 1
; SI: s_lshl_b32 [[D:s[0-9]+]], [[B]], 1
; SI: s_and_b32 s{{[0-9]+}}, [[C]], 62
; SI: s_and_b32 s{{[0-9]+}}, [[D]], 62
; SI-NOT: and
; SI: buffer_store_dwordx2
define amdgpu_kernel void @s_and_multi_use_inline_imm_i64(ptr addrspace(1) %out, i32, i64 %a, i32, i64 %b, i32, i64 %c) {
Expand Down Expand Up @@ -371,9 +371,9 @@ define amdgpu_kernel void @s_and_inline_imm_64_i64(ptr addrspace(1) %out, ptr ad

; FUNC-LABEL: {{^}}s_and_inline_imm_64_i64_noshrink:
; SI: s_load_dword [[A:s[0-9]+]]
; SI: s_lshl_b32 [[A]], [[A]], 1{{$}}
; SI: s_lshl_b32 [[B:s[0-9]+]], [[A]], 1{{$}}
; SI-NOT: and
; SI: s_and_b32 s{{[0-9]+}}, [[A]], 64
; SI: s_and_b32 s{{[0-9]+}}, [[B]], 64
; SI-NOT: and
; SI: s_add_u32
; SI-NEXT: s_addc_u32
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AMDGPU/anyext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@ define amdgpu_kernel void @anyext_i1_i32(ptr addrspace(1) %out, i32 %cond) #0 {
;
; GFX8-LABEL: anyext_i1_i32:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dword s4, s[0:1], 0x2c
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; GFX8-NEXT: s_mov_b32 s7, 0xf000
; GFX8-NEXT: s_mov_b32 s6, -1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_cmp_eq_u32 s4, 0
; GFX8-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX8-NEXT: s_cmp_eq_u32 s2, 0
; GFX8-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX8-NEXT: v_not_b32_e32 v0, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX8-NEXT: s_endpgm
;
; GFX9-LABEL: anyext_i1_i32:
Expand Down Expand Up @@ -89,15 +89,15 @@ define amdgpu_kernel void @s_anyext_i16_i32(ptr addrspace(1) %out, ptr addrspace
; GFX8-LABEL: s_anyext_i16_i32:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v3, s7
; GFX8-NEXT: v_add_u32_e32 v2, vcc, s6, v0
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v1
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_ushort v2, v[2:3]
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
Expand Down
524 changes: 271 additions & 253 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll

Large diffs are not rendered by default.

2,005 changes: 1,000 additions & 1,005 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll

Large diffs are not rendered by default.

2,155 changes: 1,095 additions & 1,060 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll

Large diffs are not rendered by default.

456 changes: 236 additions & 220 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll

Large diffs are not rendered by default.

526 changes: 272 additions & 254 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll

Large diffs are not rendered by default.

56 changes: 28 additions & 28 deletions llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,12 @@ define amdgpu_kernel void @flat_atomic_cond_sub_rtn_u32(ptr %addr, i32 %in, ptr
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_clause 0x1
; GFX12-SDAG-NEXT: s_load_b96 s[4:6], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX12-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x34
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s6
; GFX12-SDAG-NEXT: flat_atomic_cond_sub_u32 v2, v[0:1], v2 offset:16 th:TH_ATOMIC_RETURN
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX12-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-SDAG-NEXT: flat_store_b32 v[0:1], v2
; GFX12-SDAG-NEXT: s_endpgm
Expand All @@ -73,12 +73,12 @@ define amdgpu_kernel void @flat_atomic_cond_sub_rtn_u32(ptr %addr, i32 %in, ptr
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_clause 0x1
; GFX12-GISEL-NEXT: s_load_b96 s[4:6], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX12-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x34
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v1, s5
; GFX12-GISEL-NEXT: flat_atomic_cond_sub_u32 v2, v[0:1], v2 offset:16 th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX12-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-GISEL-NEXT: flat_store_b32 v[0:1], v2
; GFX12-GISEL-NEXT: s_endpgm
Expand Down Expand Up @@ -140,14 +140,14 @@ entry:
define amdgpu_kernel void @global_atomic_cond_sub_rtn_u32(ptr addrspace(1) %addr, i32 %in, ptr addrspace(1) %use) {
; GFX12-SDAG-LABEL: global_atomic_cond_sub_rtn_u32:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_clause 0x1
; GFX12-SDAG-NEXT: s_load_b96 s[4:6], s[0:1], 0x24
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX12-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x34
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, s6
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s6
; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v1, v0, v1, s[4:5] offset:16 th:TH_ATOMIC_RETURN
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[2:3]
; GFX12-SDAG-NEXT: s_nop 0
; GFX12-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX12-SDAG-NEXT: s_endpgm
Expand All @@ -156,12 +156,12 @@ define amdgpu_kernel void @global_atomic_cond_sub_rtn_u32(ptr addrspace(1) %addr
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_clause 0x1
; GFX12-GISEL-NEXT: s_load_b96 s[4:6], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX12-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x34
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s6
; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v0, v1, v0, s[4:5] offset:16 th:TH_ATOMIC_RETURN
; GFX12-GISEL-NEXT: s_wait_loadcnt 0x0
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[2:3]
; GFX12-GISEL-NEXT: s_nop 0
; GFX12-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX12-GISEL-NEXT: s_endpgm
Expand All @@ -175,21 +175,21 @@ entry:
define amdgpu_kernel void @ds_cond_sub_no_rtn_u32(ptr addrspace(3) %addr, i32 %in) {
; GFX12-SDAG-LABEL: ds_cond_sub_no_rtn_u32:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, -16
; GFX12-SDAG-NEXT: s_add_co_i32 s0, s2, -16
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v0, s0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v0, s0
; GFX12-SDAG-NEXT: ds_cond_sub_rtn_u32 v0, v0, v1
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: ds_cond_sub_no_rtn_u32:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, -16
; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, -16
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v0, s0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v0, s0
; GFX12-GISEL-NEXT: ds_cond_sub_rtn_u32 v0, v0, v1
; GFX12-GISEL-NEXT: s_endpgm
entry:
Expand All @@ -201,21 +201,21 @@ entry:
define amdgpu_kernel void @ds_cond_sub_no_rtn_u32_forced(ptr addrspace(3) %addr, i32 %in) "target-features"="+atomic-csub-no-rtn-insts" {
; GFX12-SDAG-LABEL: ds_cond_sub_no_rtn_u32_forced:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, -16
; GFX12-SDAG-NEXT: s_add_co_i32 s0, s2, -16
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v0, s0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v0, s0
; GFX12-SDAG-NEXT: ds_cond_sub_u32 v0, v1
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: ds_cond_sub_no_rtn_u32_forced:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, -16
; GFX12-GISEL-NEXT: s_add_co_u32 s0, s2, -16
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v0, s0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v0, s0
; GFX12-GISEL-NEXT: ds_cond_sub_u32 v0, v1
; GFX12-GISEL-NEXT: s_endpgm
entry:
Expand All @@ -227,22 +227,22 @@ entry:
define amdgpu_kernel void @ds_cond_sub_rtn_u32(ptr addrspace(3) %addr, i32 %in, ptr addrspace(3) %use) {
; GFX12-SDAG-LABEL: ds_cond_sub_rtn_u32:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_load_b96 s[4:6], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX12-SDAG-NEXT: ds_cond_sub_rtn_u32 v0, v0, v1 offset:16
; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, s2
; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, s6
; GFX12-SDAG-NEXT: s_wait_dscnt 0x0
; GFX12-SDAG-NEXT: ds_store_b32 v1, v0
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: ds_cond_sub_rtn_u32:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b96 s[4:6], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s5 :: v_dual_mov_b32 v1, s4
; GFX12-GISEL-NEXT: ds_cond_sub_rtn_u32 v0, v1, v0 offset:16
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s2
; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, s6
; GFX12-GISEL-NEXT: s_wait_dscnt 0x0
; GFX12-GISEL-NEXT: ds_store_b32 v1, v0
; GFX12-GISEL-NEXT: s_endpgm
Expand Down
56 changes: 28 additions & 28 deletions llvm/test/CodeGen/AMDGPU/bfe-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,36 @@
define amdgpu_kernel void @bfe_combine8(ptr addrspace(1) nocapture %arg, i32 %x) {
; VI-LABEL: bfe_combine8:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v0
; VI-NEXT: v_bfe_u32 v0, v0, 8, 8
; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: flat_load_dword v2, v[0:1]
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
;
; VI-SDWA-LABEL: bfe_combine8:
; VI-SDWA: ; %bb.0:
; VI-SDWA-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-SDWA-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-SDWA-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-SDWA-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-SDWA-NEXT: v_mov_b32_e32 v1, 2
; VI-SDWA-NEXT: s_waitcnt lgkmcnt(0)
; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s4, v0
; VI-SDWA-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
; VI-SDWA-NEXT: v_mov_b32_e32 v1, s1
; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s0, v0
; VI-SDWA-NEXT: v_mov_b32_e32 v1, s3
; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; VI-SDWA-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-SDWA-NEXT: flat_load_dword v2, v[0:1]
; VI-SDWA-NEXT: v_mov_b32_e32 v0, s0
; VI-SDWA-NEXT: v_mov_b32_e32 v1, s1
; VI-SDWA-NEXT: v_mov_b32_e32 v0, s2
; VI-SDWA-NEXT: v_mov_b32_e32 v1, s3
; VI-SDWA-NEXT: s_waitcnt vmcnt(0)
; VI-SDWA-NEXT: flat_store_dword v[0:1], v2
; VI-SDWA-NEXT: s_endpgm
Expand Down Expand Up @@ -71,40 +71,40 @@ define amdgpu_kernel void @bfe_combine8(ptr addrspace(1) nocapture %arg, i32 %x)
define amdgpu_kernel void @bfe_combine16(ptr addrspace(1) nocapture %arg, i32 %x) {
; VI-LABEL: bfe_combine16:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v0
; VI-NEXT: v_bfe_u32 v0, v0, 16, 16
; VI-NEXT: v_lshlrev_b32_e32 v0, 15, v0
; VI-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; VI-NEXT: v_mov_b32_e32 v2, s1
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0
; VI-NEXT: v_mov_b32_e32 v2, s3
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; VI-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
; VI-NEXT: flat_load_dword v2, v[0:1]
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
;
; VI-SDWA-LABEL: bfe_combine16:
; VI-SDWA: ; %bb.0:
; VI-SDWA-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-SDWA-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-SDWA-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-SDWA-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-SDWA-NEXT: v_mov_b32_e32 v1, 15
; VI-SDWA-NEXT: s_waitcnt lgkmcnt(0)
; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s4, v0
; VI-SDWA-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-SDWA-NEXT: v_mov_b32_e32 v1, 0
; VI-SDWA-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
; VI-SDWA-NEXT: v_mov_b32_e32 v2, s1
; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s0, v0
; VI-SDWA-NEXT: v_mov_b32_e32 v2, s3
; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; VI-SDWA-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc
; VI-SDWA-NEXT: flat_load_dword v2, v[0:1]
; VI-SDWA-NEXT: v_mov_b32_e32 v0, s0
; VI-SDWA-NEXT: v_mov_b32_e32 v1, s1
; VI-SDWA-NEXT: v_mov_b32_e32 v0, s2
; VI-SDWA-NEXT: v_mov_b32_e32 v1, s3
; VI-SDWA-NEXT: s_waitcnt vmcnt(0)
; VI-SDWA-NEXT: flat_store_dword v[0:1], v2
; VI-SDWA-NEXT: s_endpgm
Expand Down
72 changes: 36 additions & 36 deletions llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,18 +23,18 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
;
; VI-LABEL: v_ubfe_sub_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2
; VI-NEXT: v_mov_b32_e32 v1, s7
; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: flat_load_dword v3, v[0:1] glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: flat_load_dword v4, v[0:1] glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: v_bfe_u32 v2, v3, 0, v4
; VI-NEXT: flat_store_dword v[0:1], v2
Expand Down Expand Up @@ -78,18 +78,18 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
;
; VI-LABEL: v_ubfe_sub_multi_use_shl_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2
; VI-NEXT: v_mov_b32_e32 v1, s7
; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: flat_load_dword v3, v[0:1] glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: flat_load_dword v4, v[0:1] glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4
; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3
Expand Down Expand Up @@ -221,18 +221,18 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
;
; VI-LABEL: v_sbfe_sub_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2
; VI-NEXT: v_mov_b32_e32 v1, s7
; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: flat_load_dword v3, v[0:1] glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: flat_load_dword v4, v[0:1] glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: v_bfe_i32 v2, v3, 0, v4
; VI-NEXT: flat_store_dword v[0:1], v2
Expand Down Expand Up @@ -276,18 +276,18 @@ define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
;
; VI-LABEL: v_sbfe_sub_multi_use_shl_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2
; VI-NEXT: v_mov_b32_e32 v1, s7
; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: flat_load_dword v3, v[0:1] glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: flat_load_dword v4, v[0:1] glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: v_sub_u32_e32 v2, vcc, 32, v4
; VI-NEXT: v_lshlrev_b32_e32 v3, v2, v3
Expand Down Expand Up @@ -418,14 +418,14 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_uniform_i32(ptr addrspace(1) %out,
; VI-LABEL: s_sbfe_or_shl_shl_uniform_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dword s2, s[6:7], 0x0
; VI-NEXT: s_load_dword s0, s[0:1], 0x0
; VI-NEXT: s_load_dword s0, s[6:7], 0x0
; VI-NEXT: s_load_dword s1, s[2:3], 0x0
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_or_b32 s0, s2, s0
; VI-NEXT: s_or_b32 s0, s0, s1
; VI-NEXT: s_bfe_i32 s0, s0, 0xf0000
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dword v[0:1], v2
Expand Down Expand Up @@ -463,16 +463,16 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_nonuniform_i32(ptr addrspace(1) %ou
; VI-LABEL: s_sbfe_or_shl_shl_nonuniform_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dword s2, s[6:7], 0x0
; VI-NEXT: s_load_dword s0, s[0:1], 0x0
; VI-NEXT: s_load_dword s0, s[6:7], 0x0
; VI-NEXT: s_load_dword s1, s[2:3], 0x0
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_lshl_b32 s1, s2, 17
; VI-NEXT: s_lshl_b32 s0, s0, 19
; VI-NEXT: s_or_b32 s0, s1, s0
; VI-NEXT: s_lshl_b32 s0, s0, 17
; VI-NEXT: s_lshl_b32 s1, s1, 19
; VI-NEXT: s_or_b32 s0, s0, s1
; VI-NEXT: s_ashr_i32 s0, s0, 17
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dword v[0:1], v2
Expand Down Expand Up @@ -510,16 +510,16 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_toosmall_i32(ptr addrspace(1) %out,
; VI-LABEL: s_sbfe_or_shl_shl_toosmall_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dword s2, s[6:7], 0x0
; VI-NEXT: s_load_dword s0, s[0:1], 0x0
; VI-NEXT: s_load_dword s0, s[6:7], 0x0
; VI-NEXT: s_load_dword s1, s[2:3], 0x0
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_lshl_b32 s1, s2, 17
; VI-NEXT: s_lshl_b32 s0, s0, 16
; VI-NEXT: s_or_b32 s0, s1, s0
; VI-NEXT: s_lshl_b32 s0, s0, 17
; VI-NEXT: s_lshl_b32 s1, s1, 16
; VI-NEXT: s_or_b32 s0, s0, s1
; VI-NEXT: s_ashr_i32 s0, s0, 17
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dword v[0:1], v2
Expand Down
136 changes: 68 additions & 68 deletions llvm/test/CodeGen/AMDGPU/bfi_int.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1426,11 +1426,11 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
; GFX8-LABEL: s_bitselect_i64_pat_0:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7]
; GFX8-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX8-NEXT: s_and_b64 s[0:1], s[4:5], s[6:7]
; GFX8-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
; GFX8-NEXT: s_add_u32 s0, s0, 10
; GFX8-NEXT: s_addc_u32 s1, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -1442,11 +1442,11 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7]
; GFX10-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX10-NEXT: s_and_b64 s[0:1], s[4:5], s[6:7]
; GFX10-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
; GFX10-NEXT: s_add_u32 s0, s0, 10
; GFX10-NEXT: s_addc_u32 s1, s1, 0
; GFX10-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -1457,11 +1457,11 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
; GFX8-GISEL-LABEL: s_bitselect_i64_pat_0:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX8-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7]
; GFX8-GISEL-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX8-GISEL-NEXT: s_and_b64 s[0:1], s[4:5], s[6:7]
; GFX8-GISEL-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10
; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -1473,11 +1473,11 @@ define amdgpu_kernel void @s_bitselect_i64_pat_0(i64 %a, i64 %b, i64 %mask) {
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_clause 0x1
; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[4:5], s[6:7]
; GFX10-GISEL-NEXT: s_andn2_b64 s[0:1], s[0:1], s[4:5]
; GFX10-GISEL-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[4:5], s[6:7]
; GFX10-GISEL-NEXT: s_andn2_b64 s[2:3], s[2:3], s[4:5]
; GFX10-GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10
; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
Expand Down Expand Up @@ -1514,11 +1514,11 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
; GFX8-LABEL: s_bitselect_i64_pat_1:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1]
; GFX8-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7]
; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
; GFX8-NEXT: s_xor_b64 s[0:1], s[4:5], s[2:3]
; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GFX8-NEXT: s_add_u32 s0, s0, 10
; GFX8-NEXT: s_addc_u32 s1, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -1530,11 +1530,11 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1]
; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7]
; GFX10-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
; GFX10-NEXT: s_xor_b64 s[0:1], s[4:5], s[2:3]
; GFX10-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GFX10-NEXT: s_add_u32 s0, s0, 10
; GFX10-NEXT: s_addc_u32 s1, s1, 0
; GFX10-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -1545,11 +1545,11 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
; GFX8-GISEL-LABEL: s_bitselect_i64_pat_1:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX8-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-GISEL-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1]
; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7]
; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[4:5], s[2:3]
; GFX8-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10
; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -1561,11 +1561,11 @@ define amdgpu_kernel void @s_bitselect_i64_pat_1(i64 %a, i64 %b, i64 %mask) {
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_clause 0x1
; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1]
; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7]
; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[4:5], s[2:3]
; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10
; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
Expand Down Expand Up @@ -1602,11 +1602,11 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
; GFX8-LABEL: s_bitselect_i64_pat_2:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1]
; GFX8-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7]
; GFX8-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
; GFX8-NEXT: s_xor_b64 s[0:1], s[4:5], s[2:3]
; GFX8-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
; GFX8-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GFX8-NEXT: s_add_u32 s0, s0, 10
; GFX8-NEXT: s_addc_u32 s1, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -1618,11 +1618,11 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1]
; GFX10-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7]
; GFX10-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
; GFX10-NEXT: s_xor_b64 s[0:1], s[4:5], s[2:3]
; GFX10-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
; GFX10-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GFX10-NEXT: s_add_u32 s0, s0, 10
; GFX10-NEXT: s_addc_u32 s1, s1, 0
; GFX10-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -1633,11 +1633,11 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
; GFX8-GISEL-LABEL: s_bitselect_i64_pat_2:
; GFX8-GISEL: ; %bb.0:
; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX8-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-GISEL-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1]
; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7]
; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[4:5], s[2:3]
; GFX8-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
; GFX8-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10
; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -1649,11 +1649,11 @@ define amdgpu_kernel void @s_bitselect_i64_pat_2(i64 %a, i64 %b, i64 %mask) {
; GFX10-GISEL: ; %bb.0:
; GFX10-GISEL-NEXT: s_clause 0x1
; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: s_xor_b64 s[2:3], s[4:5], s[0:1]
; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[2:3], s[6:7]
; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[2:3], s[0:1]
; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[4:5], s[2:3]
; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[0:1], s[6:7]
; GFX10-GISEL-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10
; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
Expand Down Expand Up @@ -1691,12 +1691,12 @@ define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
; GFX8-LABEL: s_bfi_sha256_ma_i64:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_and_b64 s[2:3], s[4:5], s[0:1]
; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
; GFX8-NEXT: s_and_b64 s[0:1], s[6:7], s[0:1]
; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX8-NEXT: s_and_b64 s[0:1], s[4:5], s[2:3]
; GFX8-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
; GFX8-NEXT: s_and_b64 s[2:3], s[6:7], s[2:3]
; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
; GFX8-NEXT: s_add_u32 s0, s0, 10
; GFX8-NEXT: s_addc_u32 s1, s1, 0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -1708,12 +1708,12 @@ define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_or_b64 s[2:3], s[4:5], s[0:1]
; GFX10-NEXT: s_and_b64 s[0:1], s[4:5], s[0:1]
; GFX10-NEXT: s_and_b64 s[2:3], s[6:7], s[2:3]
; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
; GFX10-NEXT: s_or_b64 s[0:1], s[4:5], s[2:3]
; GFX10-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
; GFX10-NEXT: s_and_b64 s[0:1], s[6:7], s[0:1]
; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX10-NEXT: s_add_u32 s0, s0, 10
; GFX10-NEXT: s_addc_u32 s1, s1, 0
; GFX10-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -1724,12 +1724,12 @@ define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
; GFX8-GISEL-LABEL: s_bfi_sha256_ma_i64:
; GFX8-GISEL: ; %bb.0: ; %entry
; GFX8-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX8-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX8-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[4:5], s[0:1]
; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1]
; GFX8-GISEL-NEXT: s_and_b64 s[0:1], s[6:7], s[0:1]
; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX8-GISEL-NEXT: s_and_b64 s[0:1], s[4:5], s[2:3]
; GFX8-GISEL-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3]
; GFX8-GISEL-NEXT: s_and_b64 s[2:3], s[6:7], s[2:3]
; GFX8-GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
; GFX8-GISEL-NEXT: s_add_u32 s0, s0, 10
; GFX8-GISEL-NEXT: s_addc_u32 s1, s1, 0
; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -1741,12 +1741,12 @@ define amdgpu_kernel void @s_bfi_sha256_ma_i64(i64 %x, i64 %y, i64 %z) {
; GFX10-GISEL: ; %bb.0: ; %entry
; GFX10-GISEL-NEXT: s_clause 0x1
; GFX10-GISEL-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX10-GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-GISEL-NEXT: s_or_b64 s[2:3], s[4:5], s[0:1]
; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[4:5], s[0:1]
; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[6:7], s[2:3]
; GFX10-GISEL-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
; GFX10-GISEL-NEXT: s_or_b64 s[0:1], s[4:5], s[2:3]
; GFX10-GISEL-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3]
; GFX10-GISEL-NEXT: s_and_b64 s[0:1], s[6:7], s[0:1]
; GFX10-GISEL-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1]
; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 10
; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 0
; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/bfm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ define amdgpu_kernel void @s_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #
;
; VI-LABEL: s_bfm_pattern_simple:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bfm_b32 s2, s2, 0
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: s_bfm_b32 s0, s4, 0
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
%a = shl i32 1, %x
Expand Down
140 changes: 70 additions & 70 deletions llvm/test/CodeGen/AMDGPU/bitreverse.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,59 +34,59 @@ define amdgpu_kernel void @s_brev_i16(ptr addrspace(1) noalias %out, i16 %val) #
;
; FLAT-LABEL: s_brev_i16:
; FLAT: ; %bb.0:
; FLAT-NEXT: s_load_dword s4, s[0:1], 0x2c
; FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; FLAT-NEXT: s_mov_b32 s3, 0xf000
; FLAT-NEXT: s_mov_b32 s2, -1
; FLAT-NEXT: s_load_dword s2, s[0:1], 0x2c
; FLAT-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; FLAT-NEXT: s_mov_b32 s7, 0xf000
; FLAT-NEXT: s_mov_b32 s6, -1
; FLAT-NEXT: s_waitcnt lgkmcnt(0)
; FLAT-NEXT: s_brev_b32 s4, s4
; FLAT-NEXT: s_lshr_b32 s4, s4, 16
; FLAT-NEXT: v_mov_b32_e32 v0, s4
; FLAT-NEXT: buffer_store_short v0, off, s[0:3], 0
; FLAT-NEXT: s_brev_b32 s0, s2
; FLAT-NEXT: s_lshr_b32 s0, s0, 16
; FLAT-NEXT: v_mov_b32_e32 v0, s0
; FLAT-NEXT: buffer_store_short v0, off, s[4:7], 0
; FLAT-NEXT: s_endpgm
;
; GISEL-LABEL: s_brev_i16:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c
; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-NEXT: s_and_b32 s2, s2, 0xffff
; GISEL-NEXT: s_brev_b32 s2, s2
; GISEL-NEXT: s_lshr_b32 s2, s2, 16
; GISEL-NEXT: v_mov_b32_e32 v0, s0
; GISEL-NEXT: v_mov_b32_e32 v2, s2
; GISEL-NEXT: v_mov_b32_e32 v1, s1
; GISEL-NEXT: s_and_b32 s0, s4, 0xffff
; GISEL-NEXT: s_brev_b32 s0, s0
; GISEL-NEXT: s_lshr_b32 s0, s0, 16
; GISEL-NEXT: v_mov_b32_e32 v0, s2
; GISEL-NEXT: v_mov_b32_e32 v2, s0
; GISEL-NEXT: v_mov_b32_e32 v1, s3
; GISEL-NEXT: flat_store_short v[0:1], v2
; GISEL-NEXT: s_endpgm
;
; GFX11-FLAT-LABEL: s_brev_i16:
; GFX11-FLAT: ; %bb.0:
; GFX11-FLAT-NEXT: s_clause 0x1
; GFX11-FLAT-NEXT: s_load_b32 s2, s[0:1], 0x2c
; GFX11-FLAT-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-FLAT-NEXT: s_load_b32 s4, s[0:1], 0x2c
; GFX11-FLAT-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; GFX11-FLAT-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FLAT-NEXT: s_brev_b32 s2, s2
; GFX11-FLAT-NEXT: s_brev_b32 s0, s4
; GFX11-FLAT-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-FLAT-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-FLAT-NEXT: global_store_d16_hi_b16 v0, v1, s[0:1]
; GFX11-FLAT-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX11-FLAT-NEXT: global_store_d16_hi_b16 v0, v1, s[2:3]
; GFX11-FLAT-NEXT: s_nop 0
; GFX11-FLAT-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FLAT-NEXT: s_endpgm
;
; GFX11-GISEL-LABEL: s_brev_i16:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_clause 0x1
; GFX11-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c
; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-GISEL-NEXT: s_load_b32 s4, s[0:1], 0x2c
; GFX11-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_and_b32 s2, s2, 0xffff
; GFX11-GISEL-NEXT: s_and_b32 s0, s4, 0xffff
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: s_brev_b32 s2, s2
; GFX11-GISEL-NEXT: s_lshr_b32 s2, s2, 16
; GFX11-GISEL-NEXT: s_brev_b32 s0, s0
; GFX11-GISEL-NEXT: s_lshr_b32 s0, s0, 16
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX11-GISEL-NEXT: global_store_b16 v1, v0, s[0:1]
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX11-GISEL-NEXT: global_store_b16 v1, v0, s[2:3]
; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
Expand Down Expand Up @@ -199,55 +199,55 @@ define amdgpu_kernel void @s_brev_i32(ptr addrspace(1) noalias %out, i32 %val) #
;
; FLAT-LABEL: s_brev_i32:
; FLAT: ; %bb.0:
; FLAT-NEXT: s_load_dword s4, s[0:1], 0x2c
; FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; FLAT-NEXT: s_mov_b32 s3, 0xf000
; FLAT-NEXT: s_mov_b32 s2, -1
; FLAT-NEXT: s_load_dword s2, s[0:1], 0x2c
; FLAT-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; FLAT-NEXT: s_mov_b32 s7, 0xf000
; FLAT-NEXT: s_mov_b32 s6, -1
; FLAT-NEXT: s_waitcnt lgkmcnt(0)
; FLAT-NEXT: s_brev_b32 s4, s4
; FLAT-NEXT: v_mov_b32_e32 v0, s4
; FLAT-NEXT: buffer_store_dword v0, off, s[0:3], 0
; FLAT-NEXT: s_brev_b32 s0, s2
; FLAT-NEXT: v_mov_b32_e32 v0, s0
; FLAT-NEXT: buffer_store_dword v0, off, s[4:7], 0
; FLAT-NEXT: s_endpgm
;
; GISEL-LABEL: s_brev_i32:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_load_dword s2, s[0:1], 0x2c
; GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GISEL-NEXT: s_load_dword s4, s[0:1], 0x2c
; GISEL-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-NEXT: s_brev_b32 s2, s2
; GISEL-NEXT: v_mov_b32_e32 v0, s0
; GISEL-NEXT: v_mov_b32_e32 v2, s2
; GISEL-NEXT: v_mov_b32_e32 v1, s1
; GISEL-NEXT: s_brev_b32 s0, s4
; GISEL-NEXT: v_mov_b32_e32 v0, s2
; GISEL-NEXT: v_mov_b32_e32 v2, s0
; GISEL-NEXT: v_mov_b32_e32 v1, s3
; GISEL-NEXT: flat_store_dword v[0:1], v2
; GISEL-NEXT: s_endpgm
;
; GFX11-FLAT-LABEL: s_brev_i32:
; GFX11-FLAT: ; %bb.0:
; GFX11-FLAT-NEXT: s_clause 0x1
; GFX11-FLAT-NEXT: s_load_b32 s2, s[0:1], 0x2c
; GFX11-FLAT-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-FLAT-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-FLAT-NEXT: s_load_b64 s[4:5], s[0:1], 0x24
; GFX11-FLAT-NEXT: s_mov_b32 s7, 0x31016000
; GFX11-FLAT-NEXT: s_mov_b32 s6, -1
; GFX11-FLAT-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FLAT-NEXT: s_brev_b32 s2, s2
; GFX11-FLAT-NEXT: s_brev_b32 s0, s2
; GFX11-FLAT-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-FLAT-NEXT: v_mov_b32_e32 v0, s2
; GFX11-FLAT-NEXT: s_mov_b32 s2, -1
; GFX11-FLAT-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-FLAT-NEXT: v_mov_b32_e32 v0, s0
; GFX11-FLAT-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-FLAT-NEXT: s_nop 0
; GFX11-FLAT-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FLAT-NEXT: s_endpgm
;
; GFX11-GISEL-LABEL: s_brev_i32:
; GFX11-GISEL: ; %bb.0:
; GFX11-GISEL-NEXT: s_clause 0x1
; GFX11-GISEL-NEXT: s_load_b32 s2, s[0:1], 0x2c
; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-GISEL-NEXT: s_load_b32 s4, s[0:1], 0x2c
; GFX11-GISEL-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_brev_b32 s2, s2
; GFX11-GISEL-NEXT: s_brev_b32 s0, s4
; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[2:3]
; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
Expand Down Expand Up @@ -702,17 +702,17 @@ define amdgpu_kernel void @s_brev_v2i64(ptr addrspace(1) noalias %out, <2 x i64>
; FLAT-LABEL: s_brev_v2i64:
; FLAT: ; %bb.0:
; FLAT-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; FLAT-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; FLAT-NEXT: s_mov_b32 s3, 0xf000
; FLAT-NEXT: s_mov_b32 s2, -1
; FLAT-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; FLAT-NEXT: s_mov_b32 s11, 0xf000
; FLAT-NEXT: s_mov_b32 s10, -1
; FLAT-NEXT: s_waitcnt lgkmcnt(0)
; FLAT-NEXT: s_brev_b64 s[6:7], s[6:7]
; FLAT-NEXT: s_brev_b64 s[4:5], s[4:5]
; FLAT-NEXT: v_mov_b32_e32 v0, s4
; FLAT-NEXT: v_mov_b32_e32 v1, s5
; FLAT-NEXT: v_mov_b32_e32 v2, s6
; FLAT-NEXT: v_mov_b32_e32 v3, s7
; FLAT-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; FLAT-NEXT: s_brev_b64 s[0:1], s[6:7]
; FLAT-NEXT: s_brev_b64 s[2:3], s[4:5]
; FLAT-NEXT: v_mov_b32_e32 v0, s2
; FLAT-NEXT: v_mov_b32_e32 v1, s3
; FLAT-NEXT: v_mov_b32_e32 v2, s0
; FLAT-NEXT: v_mov_b32_e32 v3, s1
; FLAT-NEXT: buffer_store_dwordx4 v[0:3], off, s[8:11], 0
; FLAT-NEXT: s_endpgm
;
; GISEL-LABEL: s_brev_v2i64:
Expand All @@ -735,15 +735,15 @@ define amdgpu_kernel void @s_brev_v2i64(ptr addrspace(1) noalias %out, <2 x i64>
; GFX11-FLAT: ; %bb.0:
; GFX11-FLAT-NEXT: s_clause 0x1
; GFX11-FLAT-NEXT: s_load_b128 s[4:7], s[0:1], 0x34
; GFX11-FLAT-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-FLAT-NEXT: s_load_b64 s[8:9], s[0:1], 0x24
; GFX11-FLAT-NEXT: s_mov_b32 s11, 0x31016000
; GFX11-FLAT-NEXT: s_mov_b32 s10, -1
; GFX11-FLAT-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FLAT-NEXT: s_brev_b64 s[2:3], s[4:5]
; GFX11-FLAT-NEXT: s_brev_b64 s[4:5], s[6:7]
; GFX11-FLAT-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-FLAT-NEXT: v_dual_mov_b32 v2, s4 :: v_dual_mov_b32 v3, s5
; GFX11-FLAT-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-FLAT-NEXT: s_mov_b32 s2, -1
; GFX11-FLAT-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-FLAT-NEXT: s_brev_b64 s[0:1], s[4:5]
; GFX11-FLAT-NEXT: s_brev_b64 s[2:3], s[6:7]
; GFX11-FLAT-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-FLAT-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-FLAT-NEXT: buffer_store_b128 v[0:3], off, s[8:11], 0
; GFX11-FLAT-NEXT: s_nop 0
; GFX11-FLAT-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-FLAT-NEXT: s_endpgm
Expand Down
74 changes: 37 additions & 37 deletions llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -137,42 +137,42 @@ define amdgpu_kernel void @br_cc_f16_imm_a(
;
; VI-LABEL: br_cc_f16_imm_a:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_mov_b32 s4, s2
; VI-NEXT: s_mov_b32 s5, s3
; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
; VI-NEXT: s_mov_b32 s2, s6
; VI-NEXT: s_mov_b32 s3, s7
; VI-NEXT: s_mov_b32 s0, s6
; VI-NEXT: s_mov_b32 s1, s7
; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0
; VI-NEXT: s_mov_b32 s6, s2
; VI-NEXT: s_mov_b32 s7, s3
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_cmp_nlt_f16_e32 vcc, 0.5, v0
; VI-NEXT: s_cbranch_vccnz .LBB1_2
; VI-NEXT: ; %bb.1: ; %one
; VI-NEXT: v_mov_b32_e32 v0, 0x3800
; VI-NEXT: .LBB1_2: ; %two
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
; GFX11-LABEL: br_cc_f16_imm_a:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
; GFX11-NEXT: s_mov_b32 s6, -1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s4, s2
; GFX11-NEXT: s_mov_b32 s5, s3
; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0
; GFX11-NEXT: s_mov_b32 s0, s6
; GFX11-NEXT: s_mov_b32 s1, s7
; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cmp_nlt_f16_e32 vcc_lo, 0.5, v0
; GFX11-NEXT: s_cbranch_vccnz .LBB1_2
; GFX11-NEXT: ; %bb.1: ; %one
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800
; GFX11-NEXT: .LBB1_2: ; %two
; GFX11-NEXT: s_mov_b32 s2, s6
; GFX11-NEXT: s_mov_b32 s3, s7
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-NEXT: s_mov_b32 s6, s2
; GFX11-NEXT: s_mov_b32 s7, s3
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
Expand Down Expand Up @@ -221,44 +221,44 @@ define amdgpu_kernel void @br_cc_f16_imm_b(
;
; VI-LABEL: br_cc_f16_imm_b:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_mov_b32 s4, s2
; VI-NEXT: s_mov_b32 s5, s3
; VI-NEXT: buffer_load_ushort v0, off, s[4:7], 0
; VI-NEXT: s_mov_b32 s2, s6
; VI-NEXT: s_mov_b32 s3, s7
; VI-NEXT: s_mov_b32 s0, s6
; VI-NEXT: s_mov_b32 s1, s7
; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0
; VI-NEXT: s_mov_b32 s6, s2
; VI-NEXT: s_mov_b32 s7, s3
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_cmp_ngt_f16_e32 vcc, 0.5, v0
; VI-NEXT: s_cbranch_vccnz .LBB2_2
; VI-NEXT: ; %bb.1: ; %one
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
; VI-NEXT: .LBB2_2: ; %two
; VI-NEXT: v_mov_b32_e32 v0, 0x3800
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
; GFX11-LABEL: br_cc_f16_imm_b:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
; GFX11-NEXT: s_mov_b32 s6, -1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_mov_b32 s4, s2
; GFX11-NEXT: s_mov_b32 s5, s3
; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0
; GFX11-NEXT: s_mov_b32 s0, s6
; GFX11-NEXT: s_mov_b32 s1, s7
; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_cmp_ngt_f16_e32 vcc_lo, 0.5, v0
; GFX11-NEXT: s_cbranch_vccz .LBB2_2
; GFX11-NEXT: ; %bb.1: ; %two
; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800
; GFX11-NEXT: .LBB2_2: ; %one
; GFX11-NEXT: s_mov_b32 s2, s6
; GFX11-NEXT: s_mov_b32 s3, s7
; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-NEXT: s_mov_b32 s6, s2
; GFX11-NEXT: s_mov_b32 s7, s3
; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ define amdgpu_kernel void @f1(ptr addrspace(1) %arg, ptr addrspace(1) %arg1, i64
; GFX90A-NEXT: renamable $sgpr10_sgpr11 = COPY $sgpr8_sgpr9
; GFX90A-NEXT: renamable $vgpr31 = COPY $vgpr0, implicit $exec
; GFX90A-NEXT: renamable $sgpr33 = S_LOAD_DWORD_IMM renamable $sgpr6_sgpr7, 24, 0 :: (dereferenceable invariant load (s32) from %ir.arg4.kernarg.offset.align.down, align 8, addrspace 4)
; GFX90A-NEXT: renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg6.kernarg.offset.align.down, align 8, addrspace 4)
; GFX90A-NEXT: early-clobber renamable $sgpr20_sgpr21_sgpr22_sgpr23 = S_LOAD_DWORDX4_IMM_ec renamable $sgpr6_sgpr7, 24, 0 :: (dereferenceable invariant load (s128) from %ir.arg6.kernarg.offset.align.down, align 8, addrspace 4)
; GFX90A-NEXT: renamable $sgpr15 = S_LOAD_DWORD_IMM renamable $sgpr6_sgpr7, 40, 0 :: (dereferenceable invariant load (s32) from %ir.arg6.kernarg.offset.align.down + 16, align 8, addrspace 4)
; GFX90A-NEXT: renamable $sgpr16_sgpr17_sgpr18_sgpr19 = S_LOAD_DWORDX4_IMM renamable $sgpr6_sgpr7, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
; GFX90A-NEXT: renamable $sgpr54_sgpr55 = S_LOAD_DWORDX2_IMM renamable $sgpr6_sgpr7, 16, 0 :: (dereferenceable invariant load (s64) from %ir.arg.kernarg.offset1 + 16, align 16, addrspace 4)
Expand Down
156 changes: 78 additions & 78 deletions llvm/test/CodeGen/AMDGPU/bswap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -34,29 +34,29 @@ define amdgpu_kernel void @test_bswap_i32(ptr addrspace(1) %out, ptr addrspace(1
;
; VI-LABEL: test_bswap_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x10203
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dword s2, s[2:3], 0x0
; VI-NEXT: s_mov_b32 s4, s0
; VI-NEXT: s_mov_b32 s5, s1
; VI-NEXT: s_load_dword s6, s[6:7], 0x0
; VI-NEXT: s_mov_b32 s0, s4
; VI-NEXT: s_mov_b32 s1, s5
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_perm_b32 v0, 0, s2, v0
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: v_perm_b32 v0, 0, s6, v0
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm
;
; GFX11-LABEL: test_bswap_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_load_b32 s0, s[6:7], 0x0
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
; GFX11-NEXT: s_mov_b32 s6, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_perm_b32 v0, 0, s2, 0x10203
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-NEXT: v_perm_b32 v0, 0, s0, 0x10203
; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
Expand Down Expand Up @@ -87,31 +87,31 @@ define amdgpu_kernel void @test_bswap_v2i32(ptr addrspace(1) %out, ptr addrspace
;
; VI-LABEL: test_bswap_v2i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x10203
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; VI-NEXT: s_mov_b32 s4, s0
; VI-NEXT: s_mov_b32 s5, s1
; VI-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; VI-NEXT: s_mov_b32 s0, s4
; VI-NEXT: s_mov_b32 s1, s5
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_perm_b32 v1, 0, s3, v0
; VI-NEXT: v_perm_b32 v0, 0, s2, v0
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: v_perm_b32 v1, 0, s7, v0
; VI-NEXT: v_perm_b32 v0, 0, s6, v0
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
;
; GFX11-LABEL: test_bswap_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[4:5], s[2:3], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_load_b64 s[0:1], s[6:7], 0x0
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
; GFX11-NEXT: s_mov_b32 s6, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_perm_b32 v1, 0, s5, 0x10203
; GFX11-NEXT: v_perm_b32 v0, 0, s4, 0x10203
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: v_perm_b32 v1, 0, s1, 0x10203
; GFX11-NEXT: v_perm_b32 v0, 0, s0, 0x10203
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
Expand Down Expand Up @@ -148,35 +148,35 @@ define amdgpu_kernel void @test_bswap_v4i32(ptr addrspace(1) %out, ptr addrspace
;
; VI-LABEL: test_bswap_v4i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x10203
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
; VI-NEXT: s_mov_b32 s4, s0
; VI-NEXT: s_mov_b32 s5, s1
; VI-NEXT: s_load_dwordx4 s[8:11], s[6:7], 0x0
; VI-NEXT: s_mov_b32 s0, s4
; VI-NEXT: s_mov_b32 s1, s5
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_perm_b32 v3, 0, s11, v0
; VI-NEXT: v_perm_b32 v2, 0, s10, v0
; VI-NEXT: v_perm_b32 v1, 0, s9, v0
; VI-NEXT: v_perm_b32 v0, 0, s8, v0
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; VI-NEXT: s_endpgm
;
; GFX11-LABEL: test_bswap_v4i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_load_b128 s[0:3], s[6:7], 0x0
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
; GFX11-NEXT: s_mov_b32 s6, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_perm_b32 v3, 0, s7, 0x10203
; GFX11-NEXT: v_perm_b32 v2, 0, s6, 0x10203
; GFX11-NEXT: v_perm_b32 v1, 0, s5, 0x10203
; GFX11-NEXT: v_perm_b32 v0, 0, s4, 0x10203
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: v_perm_b32 v3, 0, s3, 0x10203
; GFX11-NEXT: v_perm_b32 v2, 0, s2, 0x10203
; GFX11-NEXT: v_perm_b32 v1, 0, s1, 0x10203
; GFX11-NEXT: v_perm_b32 v0, 0, s0, 0x10203
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[4:7], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
Expand Down Expand Up @@ -296,31 +296,31 @@ define amdgpu_kernel void @test_bswap_i64(ptr addrspace(1) %out, ptr addrspace(1
;
; VI-LABEL: test_bswap_i64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x10203
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; VI-NEXT: s_mov_b32 s4, s0
; VI-NEXT: s_mov_b32 s5, s1
; VI-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x0
; VI-NEXT: s_mov_b32 s0, s4
; VI-NEXT: s_mov_b32 s1, s5
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_perm_b32 v1, 0, s2, v0
; VI-NEXT: v_perm_b32 v0, 0, s3, v0
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: v_perm_b32 v1, 0, s6, v0
; VI-NEXT: v_perm_b32 v0, 0, s7, v0
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_endpgm
;
; GFX11-LABEL: test_bswap_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[4:5], s[2:3], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_load_b64 s[0:1], s[6:7], 0x0
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
; GFX11-NEXT: s_mov_b32 s6, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_perm_b32 v1, 0, s4, 0x10203
; GFX11-NEXT: v_perm_b32 v0, 0, s5, 0x10203
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
; GFX11-NEXT: v_perm_b32 v1, 0, s0, 0x10203
; GFX11-NEXT: v_perm_b32 v0, 0, s1, 0x10203
; GFX11-NEXT: buffer_store_b64 v[0:1], off, s[4:7], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
Expand Down Expand Up @@ -357,35 +357,35 @@ define amdgpu_kernel void @test_bswap_v2i64(ptr addrspace(1) %out, ptr addrspace
;
; VI-LABEL: test_bswap_v2i64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x10203
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0
; VI-NEXT: s_mov_b32 s4, s0
; VI-NEXT: s_mov_b32 s5, s1
; VI-NEXT: s_load_dwordx4 s[8:11], s[6:7], 0x0
; VI-NEXT: s_mov_b32 s0, s4
; VI-NEXT: s_mov_b32 s1, s5
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_perm_b32 v3, 0, s10, v0
; VI-NEXT: v_perm_b32 v2, 0, s11, v0
; VI-NEXT: v_perm_b32 v1, 0, s8, v0
; VI-NEXT: v_perm_b32 v0, 0, s9, v0
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; VI-NEXT: s_endpgm
;
; GFX11-LABEL: test_bswap_v2i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_load_b128 s[0:3], s[6:7], 0x0
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
; GFX11-NEXT: s_mov_b32 s6, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_perm_b32 v3, 0, s6, 0x10203
; GFX11-NEXT: v_perm_b32 v2, 0, s7, 0x10203
; GFX11-NEXT: v_perm_b32 v1, 0, s4, 0x10203
; GFX11-NEXT: v_perm_b32 v0, 0, s5, 0x10203
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
; GFX11-NEXT: v_perm_b32 v3, 0, s2, 0x10203
; GFX11-NEXT: v_perm_b32 v2, 0, s3, 0x10203
; GFX11-NEXT: v_perm_b32 v1, 0, s0, 0x10203
; GFX11-NEXT: v_perm_b32 v0, 0, s1, 0x10203
; GFX11-NEXT: buffer_store_b128 v[0:3], off, s[4:7], 0
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
Expand Down
44 changes: 22 additions & 22 deletions llvm/test/CodeGen/AMDGPU/build_vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) {
;
; GFX8-LABEL: build_vector2:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v0, 5
; GFX8-NEXT: v_mov_b32_e32 v1, 6
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v3, s1
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: v_mov_b32_e32 v3, s3
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8-NEXT: s_endpgm
;
Expand Down Expand Up @@ -52,12 +52,12 @@ define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) {
;
; GFX940-LABEL: build_vector2:
; GFX940: ; %bb.0: ; %entry
; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX940-NEXT: v_mov_b32_e32 v2, 0
; GFX940-NEXT: v_mov_b32_e32 v0, 5
; GFX940-NEXT: v_mov_b32_e32 v1, 6
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
; GFX940-NEXT: s_endpgm
entry:
store <2 x i32> <i32 5, i32 6>, ptr addrspace(1) %out
Expand All @@ -80,14 +80,14 @@ define amdgpu_kernel void @build_vector4 (ptr addrspace(1) %out) {
;
; GFX8-LABEL: build_vector4:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v0, 5
; GFX8-NEXT: v_mov_b32_e32 v1, 6
; GFX8-NEXT: v_mov_b32_e32 v2, 7
; GFX8-NEXT: v_mov_b32_e32 v3, 8
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v5, s1
; GFX8-NEXT: v_mov_b32_e32 v4, s0
; GFX8-NEXT: v_mov_b32_e32 v5, s3
; GFX8-NEXT: v_mov_b32_e32 v4, s2
; GFX8-NEXT: flat_store_dwordx4 v[4:5], v[0:3]
; GFX8-NEXT: s_endpgm
;
Expand Down Expand Up @@ -119,14 +119,14 @@ define amdgpu_kernel void @build_vector4 (ptr addrspace(1) %out) {
;
; GFX940-LABEL: build_vector4:
; GFX940: ; %bb.0: ; %entry
; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX940-NEXT: v_mov_b32_e32 v4, 0
; GFX940-NEXT: v_mov_b32_e32 v0, 5
; GFX940-NEXT: v_mov_b32_e32 v1, 6
; GFX940-NEXT: v_mov_b32_e32 v2, 7
; GFX940-NEXT: v_mov_b32_e32 v3, 8
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
; GFX940-NEXT: s_endpgm
entry:
store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, ptr addrspace(1) %out
Expand All @@ -146,11 +146,11 @@ define amdgpu_kernel void @build_vector_v2i16 (ptr addrspace(1) %out) {
;
; GFX8-LABEL: build_vector_v2i16:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, 0x60005
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
;
Expand All @@ -176,11 +176,11 @@ define amdgpu_kernel void @build_vector_v2i16 (ptr addrspace(1) %out) {
;
; GFX940-LABEL: build_vector_v2i16:
; GFX940: ; %bb.0: ; %entry
; GFX940-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX940-NEXT: v_mov_b32_e32 v0, 0
; GFX940-NEXT: v_mov_b32_e32 v1, 0x60005
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-NEXT: s_endpgm
entry:
store <2 x i16> <i16 5, i16 6>, ptr addrspace(1) %out
Expand All @@ -201,14 +201,14 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32
;
; GFX8-LABEL: build_vector_v2i16_trunc:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_load_dword s4, s[0:1], 0x2c
; GFX8-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_lshr_b32 s2, s2, 16
; GFX8-NEXT: s_or_b32 s2, s2, 0x50000
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: s_lshr_b32 s0, s4, 16
; GFX8-NEXT: s_or_b32 s0, s0, 0x50000
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: flat_store_dword v[0:1], v2
; GFX8-NEXT: s_endpgm
;
Expand Down
324 changes: 162 additions & 162 deletions llvm/test/CodeGen/AMDGPU/calling-conventions.ll

Large diffs are not rendered by default.

850 changes: 426 additions & 424 deletions llvm/test/CodeGen/AMDGPU/carryout-selection.ll

Large diffs are not rendered by default.

418 changes: 209 additions & 209 deletions llvm/test/CodeGen/AMDGPU/clamp-modifier.ll

Large diffs are not rendered by default.

1,334 changes: 667 additions & 667 deletions llvm/test/CodeGen/AMDGPU/clamp.ll

Large diffs are not rendered by default.

30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ define amdgpu_kernel void @add1(ptr addrspace(1) nocapture %arg) {
;
; GFX9-LABEL: add1:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v3, v2, s[0:1]
; GFX9-NEXT: global_load_dword v3, v2, s[2:3]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v3, vcc
; GFX9-NEXT: global_store_dword v2, v0, s[0:1]
; GFX9-NEXT: global_store_dword v2, v0, s[2:3]
; GFX9-NEXT: s_endpgm
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -103,14 +103,14 @@ define amdgpu_kernel void @sub1(ptr addrspace(1) nocapture %arg) {
;
; GFX9-LABEL: sub1:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v3, v2, s[0:1]
; GFX9-NEXT: global_load_dword v3, v2, s[2:3]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_subbrev_co_u32_e32 v0, vcc, 0, v3, vcc
; GFX9-NEXT: global_store_dword v2, v0, s[0:1]
; GFX9-NEXT: global_store_dword v2, v0, s[2:3]
; GFX9-NEXT: s_endpgm
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -450,15 +450,15 @@ define amdgpu_kernel void @add_and(ptr addrspace(1) nocapture %arg) {
;
; GFX9-LABEL: add_and:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX9-NEXT: v_max_u32_e32 v1, 1, v1
; GFX9-NEXT: v_cmp_lt_u32_e32 vcc, v1, v0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v3, v2, s[0:1]
; GFX9-NEXT: global_load_dword v3, v2, s[2:3]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v3, vcc
; GFX9-NEXT: global_store_dword v2, v0, s[0:1]
; GFX9-NEXT: global_store_dword v2, v0, s[2:3]
; GFX9-NEXT: s_endpgm
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -493,14 +493,14 @@ define amdgpu_kernel void @cmp_sub_sext(ptr addrspace(1) nocapture %arg) {
;
; GFX9-LABEL: cmp_sub_sext:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v3, v2, s[0:1]
; GFX9-NEXT: global_load_dword v3, v2, s[2:3]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v3, vcc
; GFX9-NEXT: global_store_dword v2, v0, s[0:1]
; GFX9-NEXT: global_store_dword v2, v0, s[2:3]
; GFX9-NEXT: s_endpgm
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -533,14 +533,14 @@ define amdgpu_kernel void @cmp_sub_zext(ptr addrspace(1) nocapture %arg) {
;
; GFX9-LABEL: cmp_sub_zext:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX9-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, v0, v1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_dword v3, v2, s[0:1]
; GFX9-NEXT: global_load_dword v3, v2, s[2:3]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_subbrev_co_u32_e32 v0, vcc, 0, v3, vcc
; GFX9-NEXT: global_store_dword v2, v0, s[0:1]
; GFX9-NEXT: global_store_dword v2, v0, s[2:3]
; GFX9-NEXT: s_endpgm
bb:
%x = tail call i32 @llvm.amdgcn.workitem.id.x()
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AMDGPU/combine-vload-extract.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@
define amdgpu_kernel void @vectorLoadCombine(ptr %in, ptr %out) {
; GCN-LABEL: vectorLoadCombine:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s0
; GCN-NEXT: v_mov_b32_e32 v1, s1
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v1, s5
; GCN-NEXT: flat_load_dword v2, v[0:1]
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: v_mov_b32_e32 v0, s6
; GCN-NEXT: v_mov_b32_e32 v1, s7
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: flat_store_dword v[0:1], v2
; GCN-NEXT: s_endpgm
Expand All @@ -37,14 +37,14 @@ entry:
define amdgpu_kernel void @vectorLoadShuffle(ptr %in, ptr %out) {
; GCN-LABEL: vectorLoadShuffle:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GCN-NEXT: s_mov_b32 s0, 0x7050604
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s0
; GCN-NEXT: v_mov_b32_e32 v1, s1
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: v_mov_b32_e32 v1, s5
; GCN-NEXT: flat_load_dword v2, v[0:1]
; GCN-NEXT: s_mov_b32 s0, 0x7050604
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: v_mov_b32_e32 v0, s6
; GCN-NEXT: v_mov_b32_e32 v1, s7
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: v_perm_b32 v2, v2, v2, s0
; GCN-NEXT: flat_store_dword v[0:1], v2
Expand Down
257 changes: 127 additions & 130 deletions llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll

Large diffs are not rendered by default.

22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AMDGPU/copy_to_scc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,21 @@ define amdgpu_kernel void @copy_to_scc(ptr addrspace(1) %out, ptr addrspace(1) %
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
; GCN-NEXT: s_nop 0
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: buffer_load_dword v0, off, s[4:7], 0 offset:252
; GCN-NEXT: s_load_dword s2, s[2:3], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s2, 0
; GCN-NEXT: s_cselect_b64 s[2:3], -1, 0
; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
; GCN-NEXT: s_xor_b64 s[2:3], s[2:3], vcc
; GCN-NEXT: s_and_b64 s[2:3], s[2:3], exec
; GCN-NEXT: s_cselect_b32 s2, 2, 3
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: global_store_dword v1, v0, s[0:1]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_load_dword s0, s[6:7], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s0, 0
; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0
; GCN-NEXT: s_xor_b64 s[0:1], s[0:1], vcc
; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec
; GCN-NEXT: s_cselect_b32 s0, 2, 3
; GCN-NEXT: v_mov_b32_e32 v0, s0
; GCN-NEXT: global_store_dword v1, v0, s[4:5]
; GCN-NEXT: s_endpgm
entry: ; preds = %1009
%0 = load i32, ptr addrspace(1) %in, align 4
Expand Down
54 changes: 27 additions & 27 deletions llvm/test/CodeGen/AMDGPU/ctlz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,15 @@ define amdgpu_kernel void @s_ctlz_i32(ptr addrspace(1) noalias %out, i32 %val) n
;
; VI-LABEL: s_ctlz_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_flbit_i32_b32 s4, s4
; VI-NEXT: s_min_u32 s4, s4, 32
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_flbit_i32_b32 s0, s2
; VI-NEXT: s_min_u32 s0, s0, 32
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
; EG-LABEL: s_ctlz_i32:
Expand Down Expand Up @@ -88,14 +88,14 @@ define amdgpu_kernel void @s_ctlz_i32(ptr addrspace(1) noalias %out, i32 %val) n
; GFX11-LABEL: s_ctlz_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: s_load_b32 s4, s[0:1], 0x2c
; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clz_i32_u32 s2, s2
; GFX11-NEXT: s_clz_i32_u32 s0, s4
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_min_u32 s2, s2, 32
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX11-NEXT: s_min_u32 s0, s0, 32
; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT: global_store_b32 v0, v1, s[2:3]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
Expand Down Expand Up @@ -612,16 +612,16 @@ define amdgpu_kernel void @s_ctlz_i64(ptr addrspace(1) noalias %out, [8 x i32],
;
; VI-LABEL: s_ctlz_i64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x4c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_flbit_i32_b64 s4, s[4:5]
; VI-NEXT: s_min_u32 s4, s4, 64
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_flbit_i32_b64 s0, s[2:3]
; VI-NEXT: s_min_u32 s0, s0, 64
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
; EG-LABEL: s_ctlz_i64:
Expand Down Expand Up @@ -674,13 +674,13 @@ define amdgpu_kernel void @s_ctlz_i64(ptr addrspace(1) noalias %out, [8 x i32],
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x4c
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[4:5], s[0:1], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clz_i32_u64 s2, s[2:3]
; GFX11-NEXT: s_clz_i32_u64 s0, s[2:3]
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
; GFX11-NEXT: s_min_u32 s2, s2, 64
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; GFX11-NEXT: global_store_b64 v1, v[0:1], s[0:1]
; GFX11-NEXT: s_min_u32 s0, s0, 64
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s0
; GFX11-NEXT: global_store_b64 v1, v[0:1], s[4:5]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
Expand Down
86 changes: 43 additions & 43 deletions llvm/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i32(ptr addrspace(1) noalias %out,
;
; VI-LABEL: s_ctlz_zero_undef_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_flbit_i32_b32 s2, s2
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: s_flbit_i32_b32 s0, s4
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
;
Expand Down Expand Up @@ -331,14 +331,14 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i8_with_select(ptr addrspace(1) noa
;
; VI-LABEL: s_ctlz_zero_undef_i8_with_select:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_lshl_b32 s2, s2, 24
; VI-NEXT: s_flbit_i32_b32 s2, s2
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: s_lshl_b32 s0, s4, 24
; VI-NEXT: s_flbit_i32_b32 s0, s0
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_byte v[0:1], v2
; VI-NEXT: s_endpgm
;
Expand Down Expand Up @@ -405,15 +405,15 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i16_with_select(ptr addrspace(1) no
;
; VI-LABEL: s_ctlz_zero_undef_i16_with_select:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_and_b32 s2, s2, 0xffff
; VI-NEXT: s_flbit_i32_b32 s2, s2
; VI-NEXT: s_add_i32 s2, s2, -16
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: s_and_b32 s0, s4, 0xffff
; VI-NEXT: s_flbit_i32_b32 s0, s0
; VI-NEXT: s_add_i32 s0, s0, -16
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_short v[0:1], v2
; VI-NEXT: s_endpgm
;
Expand Down Expand Up @@ -479,13 +479,13 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i32_with_select(ptr addrspace(1) no
;
; VI-LABEL: s_ctlz_zero_undef_i32_with_select:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_flbit_i32_b32 s2, s2
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: s_flbit_i32_b32 s0, s4
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
;
Expand Down Expand Up @@ -1198,13 +1198,13 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i64(ptr addrspace(1) noalias %out,
; VI-LABEL: s_ctlz_zero_undef_i64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_flbit_i32_b64 s2, s[2:3]
; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: s_flbit_i32_b64 s0, s[2:3]
; VI-NEXT: v_mov_b32_e32 v2, s4
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v3, s5
; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; VI-NEXT: s_endpgm
;
Expand Down Expand Up @@ -2218,19 +2218,19 @@ define amdgpu_kernel void @s_ctlz_zero_undef_i18(ptr addrspace(1) noalias %out,
;
; VI-LABEL: s_ctlz_zero_undef_i18:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_and_b32 s2, s2, 0x3ffff
; VI-NEXT: s_flbit_i32_b32 s2, s2
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: s_add_i32 s2, s2, -14
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: s_add_u32 s0, s0, 2
; VI-NEXT: s_and_b32 s0, s4, 0x3ffff
; VI-NEXT: s_flbit_i32_b32 s0, s0
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: s_add_i32 s4, s0, -14
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v2, s4
; VI-NEXT: s_add_u32 s0, s2, 2
; VI-NEXT: flat_store_short v[0:1], v2
; VI-NEXT: s_addc_u32 s1, s1, 0
; VI-NEXT: s_bfe_u32 s2, s2, 0x20010
; VI-NEXT: s_addc_u32 s1, s3, 0
; VI-NEXT: s_bfe_u32 s2, s4, 0x20010
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
Expand Down
52 changes: 26 additions & 26 deletions llvm/test/CodeGen/AMDGPU/ctpop16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,15 +27,15 @@ define amdgpu_kernel void @s_ctpop_i16(ptr addrspace(1) noalias %out, i16 %val)
;
; VI-LABEL: s_ctpop_i16:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_and_b32 s4, s4, 0xffff
; VI-NEXT: s_bcnt1_i32_b32 s4, s4
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-NEXT: s_and_b32 s0, s2, 0xffff
; VI-NEXT: s_bcnt1_i32_b32 s0, s0
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
; EG-LABEL: s_ctpop_i16:
Expand Down Expand Up @@ -167,14 +167,14 @@ define amdgpu_kernel void @v_ctpop_add_chain_i16(ptr addrspace(1) noalias %out,
; VI-LABEL: v_ctpop_add_chain_i16:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; VI-NEXT: v_lshlrev_b32_e32 v2, 1, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s7
; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: v_mov_b32_e32 v3, s1
; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2
; VI-NEXT: v_mov_b32_e32 v3, s3
; VI-NEXT: v_add_u32_e32 v2, vcc, s2, v2
; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; VI-NEXT: flat_load_ushort v0, v[0:1] glc
; VI-NEXT: s_waitcnt vmcnt(0)
Expand Down Expand Up @@ -1419,15 +1419,15 @@ define amdgpu_kernel void @v_ctpop_i16_add_vvar_inv(ptr addrspace(1) noalias %ou
; VI-LABEL: v_ctpop_i16_add_vvar_inv:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; VI-NEXT: v_lshlrev_b32_e32 v2, 1, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s7
; VI-NEXT: v_add_u32_e32 v0, vcc, s6, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: flat_load_ushort v3, v[0:1]
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2
; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; VI-NEXT: flat_load_ushort v0, v[0:1]
; VI-NEXT: s_mov_b32 s7, 0xf000
Expand Down Expand Up @@ -1517,29 +1517,29 @@ define amdgpu_kernel void @ctpop_i16_in_br(ptr addrspace(1) %out, ptr addrspace(
;
; VI-LABEL: ctpop_i16_in_br:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_load_dword s4, s[0:1], 0x34
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dword s2, s[0:1], 0x34
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_lshr_b32 s5, s4, 16
; VI-NEXT: s_cmp_lg_u32 s5, 0
; VI-NEXT: s_lshr_b32 s0, s2, 16
; VI-NEXT: s_cmp_lg_u32 s0, 0
; VI-NEXT: s_cbranch_scc0 .LBB14_4
; VI-NEXT: ; %bb.1: ; %else
; VI-NEXT: s_mov_b32 s11, 0xf000
; VI-NEXT: s_mov_b32 s10, -1
; VI-NEXT: s_mov_b32 s8, s2
; VI-NEXT: s_mov_b32 s9, s3
; VI-NEXT: s_mov_b32 s8, s6
; VI-NEXT: s_mov_b32 s9, s7
; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 offset:2
; VI-NEXT: s_cbranch_execnz .LBB14_3
; VI-NEXT: .LBB14_2: ; %if
; VI-NEXT: s_and_b32 s2, s4, 0xffff
; VI-NEXT: s_bcnt1_i32_b32 s2, s2
; VI-NEXT: s_and_b32 s0, s2, 0xffff
; VI-NEXT: s_bcnt1_i32_b32 s0, s0
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: .LBB14_3: ; %endif
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-NEXT: buffer_store_short v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
; VI-NEXT: .LBB14_4:
; VI-NEXT: ; implicit-def: $vgpr0
Expand Down
76 changes: 38 additions & 38 deletions llvm/test/CodeGen/AMDGPU/ctpop64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ define amdgpu_kernel void @s_ctpop_i64(ptr addrspace(1) noalias %out, [8 x i32],
;
; VI-LABEL: s_ctpop_i64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x4c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_bcnt1_i32_b64 s0, s[2:3]
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
%ctpop = call i64 @llvm.ctpop.i64(i64 %val) nounwind readnone
%truncctpop = trunc i64 %ctpop to i32
Expand Down Expand Up @@ -116,7 +116,7 @@ define amdgpu_kernel void @v_ctpop_i64_user(ptr addrspace(1) noalias %out, ptr a
; VI-LABEL: v_ctpop_i64_user:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; VI-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s7
Expand All @@ -128,8 +128,8 @@ define amdgpu_kernel void @v_ctpop_i64_user(ptr addrspace(1) noalias %out, ptr a
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: v_bcnt_u32_b32 v0, v0, 0
; VI-NEXT: v_bcnt_u32_b32 v0, v1, v0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_or_b32_e32 v0, s0, v0
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_or_b32_e32 v0, s2, v0
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -159,15 +159,15 @@ define amdgpu_kernel void @s_ctpop_v2i64(ptr addrspace(1) noalias %out, <2 x i64
; VI-LABEL: s_ctpop_v2i64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s11, 0xf000
; VI-NEXT: s_mov_b32 s10, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; VI-NEXT: s_bcnt1_i32_b64 s5, s[6:7]
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_bcnt1_i32_b64 s0, s[4:5]
; VI-NEXT: s_bcnt1_i32_b64 s1, s[6:7]
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
; VI-NEXT: s_endpgm
%ctpop = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %val) nounwind readnone
%truncctpop = trunc <2 x i64> %ctpop to <2 x i32>
Expand Down Expand Up @@ -197,19 +197,19 @@ define amdgpu_kernel void @s_ctpop_v4i64(ptr addrspace(1) noalias %out, <4 x i64
; VI-LABEL: s_ctpop_v4i64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx8 s[4:11], s[0:1], 0x44
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s15, 0xf000
; VI-NEXT: s_mov_b32 s14, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; VI-NEXT: s_bcnt1_i32_b64 s5, s[6:7]
; VI-NEXT: s_bcnt1_i32_b64 s6, s[8:9]
; VI-NEXT: s_bcnt1_i32_b64 s7, s[10:11]
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: v_mov_b32_e32 v1, s5
; VI-NEXT: v_mov_b32_e32 v2, s6
; VI-NEXT: v_mov_b32_e32 v3, s7
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
; VI-NEXT: s_bcnt1_i32_b64 s0, s[4:5]
; VI-NEXT: s_bcnt1_i32_b64 s1, s[6:7]
; VI-NEXT: s_bcnt1_i32_b64 s2, s[8:9]
; VI-NEXT: s_bcnt1_i32_b64 s3, s[10:11]
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: v_mov_b32_e32 v3, s3
; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[12:15], 0
; VI-NEXT: s_endpgm
%ctpop = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> %val) nounwind readnone
%truncctpop = trunc <4 x i64> %ctpop to <4 x i32>
Expand Down Expand Up @@ -424,15 +424,15 @@ define amdgpu_kernel void @s_ctpop_i128(ptr addrspace(1) noalias %out, i128 %val
; VI-LABEL: s_ctpop_i128:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s11, 0xf000
; VI-NEXT: s_mov_b32 s10, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bcnt1_i32_b64 s6, s[6:7]
; VI-NEXT: s_bcnt1_i32_b64 s4, s[4:5]
; VI-NEXT: s_add_i32 s4, s4, s6
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_bcnt1_i32_b64 s0, s[6:7]
; VI-NEXT: s_bcnt1_i32_b64 s1, s[4:5]
; VI-NEXT: s_add_i32 s0, s1, s0
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: buffer_store_dword v0, off, s[8:11], 0
; VI-NEXT: s_endpgm
%ctpop = call i128 @llvm.ctpop.i128(i128 %val) nounwind readnone
%truncctpop = trunc i128 %ctpop to i32
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AMDGPU/cttz.ll
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ define amdgpu_kernel void @s_cttz_i32(ptr addrspace(1) noalias %out, i32 %val) n
;
; VI-LABEL: s_cttz_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_ff1_i32_b32 s4, s4
; VI-NEXT: s_min_u32 s4, s4, 32
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_ff1_i32_b32 s0, s2
; VI-NEXT: s_min_u32 s0, s0, 32
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
; VI-NEXT: s_endpgm
;
; EG-LABEL: s_cttz_i32:
Expand Down Expand Up @@ -519,16 +519,16 @@ define amdgpu_kernel void @s_cttz_i64(ptr addrspace(1) noalias %out, [8 x i32],
;
; VI-LABEL: s_cttz_i64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x4c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x4c
; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_ff1_i32_b64 s4, s[4:5]
; VI-NEXT: s_min_u32 s4, s4, 64
; VI-NEXT: v_mov_b32_e32 v0, s4
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; VI-NEXT: s_ff1_i32_b64 s0, s[2:3]
; VI-NEXT: s_min_u32 s0, s0, 64
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; VI-NEXT: s_endpgm
;
; EG-LABEL: s_cttz_i64:
Expand Down
48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,13 @@ define amdgpu_kernel void @s_cttz_zero_undef_i32(ptr addrspace(1) noalias %out,
;
; VI-LABEL: s_cttz_zero_undef_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_ff1_i32_b32 s2, s2
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: s_ff1_i32_b32 s0, s4
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
;
Expand Down Expand Up @@ -317,13 +317,13 @@ define amdgpu_kernel void @s_cttz_zero_undef_i8_with_select(ptr addrspace(1) noa
;
; VI-LABEL: s_cttz_zero_undef_i8_with_select:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_ff1_i32_b32 s2, s2
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: s_ff1_i32_b32 s0, s4
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_byte v[0:1], v2
; VI-NEXT: s_endpgm
;
Expand Down Expand Up @@ -386,13 +386,13 @@ define amdgpu_kernel void @s_cttz_zero_undef_i16_with_select(ptr addrspace(1) no
;
; VI-LABEL: s_cttz_zero_undef_i16_with_select:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_ff1_i32_b32 s2, s2
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: s_ff1_i32_b32 s0, s4
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_short v[0:1], v2
; VI-NEXT: s_endpgm
;
Expand Down Expand Up @@ -455,13 +455,13 @@ define amdgpu_kernel void @s_cttz_zero_undef_i32_with_select(ptr addrspace(1) no
;
; VI-LABEL: s_cttz_zero_undef_i32_with_select:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_ff1_i32_b32 s2, s2
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
; VI-NEXT: s_ff1_i32_b32 s0, s4
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_mov_b32_e32 v1, s3
; VI-NEXT: v_mov_b32_e32 v2, s0
; VI-NEXT: flat_store_dword v[0:1], v2
; VI-NEXT: s_endpgm
;
Expand Down
Loading