118 changes: 68 additions & 50 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
define amdgpu_kernel void @v_mul_i64_no_zext(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind {
; GFX10-LABEL: v_mul_i64_no_zext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x2c
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x2c
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
Expand All @@ -23,7 +23,9 @@ define amdgpu_kernel void @v_mul_i64_no_zext(ptr addrspace(1) %out, ptr addrspac
;
; GFX11-LABEL: v_mul_i64_no_zext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x2c
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x2c
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v9, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
Expand Down Expand Up @@ -56,13 +58,13 @@ define amdgpu_kernel void @v_mul_i64_zext_src1(ptr addrspace(1) %out, ptr addrsp
; GFX10-LABEL: v_mul_i64_zext_src1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 2, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v2, s[6:7]
; GFX10-NEXT: global_load_dword v4, v3, s[2:3]
; GFX10-NEXT: global_load_dword v4, v3, s[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, v0, v4, 0
; GFX10-NEXT: v_mov_b32_e32 v0, v3
Expand All @@ -75,8 +77,10 @@ define amdgpu_kernel void @v_mul_i64_zext_src1(ptr addrspace(1) %out, ptr addrsp
; GFX11-LABEL: v_mul_i64_zext_src1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -108,13 +112,13 @@ define amdgpu_kernel void @v_mul_i64_zext_src0(ptr addrspace(1) %out, ptr addrsp
; GFX10-LABEL: v_mul_i64_zext_src0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dword v4, v2, s[6:7]
; GFX10-NEXT: global_load_dwordx2 v[0:1], v3, s[2:3]
; GFX10-NEXT: global_load_dwordx2 v[0:1], v3, s[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, v4, v0, 0
; GFX10-NEXT: v_mov_b32_e32 v0, v3
Expand All @@ -127,8 +131,10 @@ define amdgpu_kernel void @v_mul_i64_zext_src0(ptr addrspace(1) %out, ptr addrsp
; GFX11-LABEL: v_mul_i64_zext_src0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -160,13 +166,13 @@ define amdgpu_kernel void @v_mul_i64_zext_src0_src1(ptr addrspace(1) %out, ptr a
; GFX10-LABEL: v_mul_i64_zext_src0_src1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dword v1, v0, s[6:7]
; GFX10-NEXT: global_load_dword v2, v0, s[2:3]
; GFX10-NEXT: global_load_dword v2, v0, s[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v1, v2, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
Expand All @@ -176,10 +182,12 @@ define amdgpu_kernel void @v_mul_i64_zext_src0_src1(ptr addrspace(1) %out, ptr a
; GFX11-LABEL: v_mul_i64_zext_src0_src1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_load_b32 v1, v0, s[6:7]
Expand Down Expand Up @@ -207,13 +215,13 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_hi(ptr addrspace(1) %out, ptr a
; GFX10-LABEL: v_mul_i64_masked_src0_hi:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dword v4, v2, s[6:7]
; GFX10-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3]
; GFX10-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, v4, v0, 0
; GFX10-NEXT: v_mov_b32_e32 v0, v3
Expand All @@ -226,8 +234,10 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_hi(ptr addrspace(1) %out, ptr a
; GFX11-LABEL: v_mul_i64_masked_src0_hi:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
Expand Down Expand Up @@ -259,13 +269,13 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_lo(ptr addrspace(1) %out, ptr a
; GFX10-LABEL: v_mul_i64_masked_src0_lo:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[6:7]
; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3]
; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
Expand All @@ -276,8 +286,10 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_lo(ptr addrspace(1) %out, ptr a
; GFX11-LABEL: v_mul_i64_masked_src0_lo:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
Expand Down Expand Up @@ -307,16 +319,16 @@ define amdgpu_kernel void @v_mul_i64_masked_src1_lo(ptr addrspace(1) %out, ptr a
; GFX10-LABEL: v_mul_i64_masked_src1_lo:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 3, v0
; GFX10-NEXT: ; kill: killed $vgpr3
; GFX10-NEXT: ; kill: killed $sgpr6_sgpr7
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx2 v[0:1], v3, s[6:7]
; GFX10-NEXT: global_load_dwordx2 v[1:2], v3, s[2:3]
; GFX10-NEXT: ; kill: killed $sgpr2_sgpr3
; GFX10-NEXT: global_load_dwordx2 v[1:2], v3, s[0:1]
; GFX10-NEXT: ; kill: killed $sgpr0_sgpr1
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mul_lo_u32 v1, v0, v2
; GFX10-NEXT: v_mov_b32_e32 v0, 0
Expand All @@ -326,8 +338,10 @@ define amdgpu_kernel void @v_mul_i64_masked_src1_lo(ptr addrspace(1) %out, ptr a
; GFX11-LABEL: v_mul_i64_masked_src1_lo:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
Expand Down Expand Up @@ -355,17 +369,17 @@ define amdgpu_kernel void @v_mul_i64_masked_src1_lo(ptr addrspace(1) %out, ptr a
define amdgpu_kernel void @v_mul_i64_masked_src0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
; GFX10-LABEL: v_mul_i64_masked_src0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: v_mul_i64_masked_src0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -389,13 +403,13 @@ define amdgpu_kernel void @v_mul_i64_partially_masked_src0(ptr addrspace(1) %out
; GFX10-LABEL: v_mul_i64_partially_masked_src0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[6:7]
; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3]
; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_and_b32_e32 v6, 0xfff00000, v0
; GFX10-NEXT: s_waitcnt vmcnt(0)
Expand All @@ -412,8 +426,10 @@ define amdgpu_kernel void @v_mul_i64_partially_masked_src0(ptr addrspace(1) %out
; GFX11-LABEL: v_mul_i64_partially_masked_src0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
Expand Down Expand Up @@ -450,17 +466,17 @@ define amdgpu_kernel void @v_mul_i64_partially_masked_src0(ptr addrspace(1) %out
define amdgpu_kernel void @v_mul64_masked_before_branch(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
; GFX10-LABEL: v_mul64_masked_before_branch:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3]
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: v_mul64_masked_before_branch:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -498,13 +514,13 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
; GFX10-LABEL: v_mul64_masked_before_and_in_branch:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx2 v[2:3], v0, s[6:7]
; GFX10-NEXT: global_load_dwordx2 v[4:5], v0, s[2:3]
; GFX10-NEXT: global_load_dwordx2 v[4:5], v0, s[0:1]
; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_cmp_ge_u64_e32 vcc_lo, 0, v[2:3]
Expand Down Expand Up @@ -533,8 +549,10 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
; GFX11-LABEL: v_mul64_masked_before_and_in_branch:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2542,7 +2542,7 @@ define amdgpu_ps void @s_mul_u64_zext_with_vregs(ptr addrspace(1) %out, ptr addr
define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; GFX7-LABEL: s_mul_u64_zext_with_sregs:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; GFX7-NEXT: v_mov_b32_e32 v0, 0x50
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_load_dword s3, s[2:3], 0x0
Expand All @@ -2559,7 +2559,7 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX8-LABEL: s_mul_u64_zext_with_sregs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX8-NEXT: v_mov_b32_e32 v0, 0x50
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -2576,7 +2576,7 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX9-LABEL: s_mul_u64_zext_with_sregs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s1, s[6:7], 0x0
Expand All @@ -2590,7 +2590,7 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX10-LABEL: s_mul_u64_zext_with_sregs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s1, s[6:7], 0x0
Expand All @@ -2604,7 +2604,7 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX11-LABEL: s_mul_u64_zext_with_sregs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b32 s3, s[2:3], 0x0
Expand All @@ -2619,7 +2619,7 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX12-LABEL: s_mul_u64_zext_with_sregs:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
Expand Down Expand Up @@ -2718,7 +2718,7 @@ define amdgpu_ps void @s_mul_u64_sext_with_vregs(ptr addrspace(1) %out, ptr addr
define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; GFX7-LABEL: s_mul_u64_sext_with_sregs:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; GFX7-NEXT: v_mov_b32_e32 v0, 0x50
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_load_dword s3, s[2:3], 0x0
Expand All @@ -2738,7 +2738,7 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX8-LABEL: s_mul_u64_sext_with_sregs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX8-NEXT: v_mov_b32_e32 v0, 0x50
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -2758,7 +2758,7 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX9-LABEL: s_mul_u64_sext_with_sregs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s1, s[6:7], 0x0
Expand All @@ -2775,7 +2775,7 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX10-LABEL: s_mul_u64_sext_with_sregs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -2792,7 +2792,7 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX11-LABEL: s_mul_u64_sext_with_sregs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b32 s2, s[2:3], 0x0
Expand All @@ -2810,7 +2810,7 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX12-LABEL: s_mul_u64_sext_with_sregs:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
Expand Down
738 changes: 369 additions & 369 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ entry:
; GCN-LABEL: {{^}}smrd6:
; SICIVI: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, -4
; SICIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0
; GFX9_10: s_add_u32 s2, s2, -4
; GFX9_10: s_addc_u32 s3, s3, -1
; GFX9_10: s_add_u32 s0, s6, -4
; GFX9_10: s_addc_u32 s1, s7, -1
; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0
define amdgpu_kernel void @smrd6(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
entry:
Expand Down
486 changes: 243 additions & 243 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll

Large diffs are not rendered by default.

30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll
Original file line number Diff line number Diff line change
Expand Up @@ -479,33 +479,33 @@ return:
define amdgpu_kernel void @v8i8_phi_chain(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst0, ptr addrspace(1) nocapture %dst1) {
; GFX906-LABEL: v8i8_phi_chain:
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: s_load_dwordx8 s[0:7], s[2:3], 0x24
; GFX906-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x24
; GFX906-NEXT: v_lshlrev_b32_e32 v3, 3, v0
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
; GFX906-NEXT: global_load_dwordx2 v[1:2], v3, s[0:1]
; GFX906-NEXT: s_xor_b64 s[0:1], vcc, -1
; GFX906-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
; GFX906-NEXT: global_load_dwordx2 v[1:2], v3, s[4:5]
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX906-NEXT: s_cbranch_execz .LBB8_2
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dwordx2 v[1:2], v3, s[2:3]
; GFX906-NEXT: global_load_dwordx2 v[1:2], v3, s[6:7]
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
; GFX906-NEXT: s_andn2_b64 s[0:1], s[0:1], exec
; GFX906-NEXT: s_and_b64 s[2:3], exec, vcc
; GFX906-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3]
; GFX906-NEXT: s_and_b64 s[4:5], exec, vcc
; GFX906-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5]
; GFX906-NEXT: .LBB8_2: ; %Flow
; GFX906-NEXT: s_or_b64 exec, exec, s[8:9]
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], s[0:1]
; GFX906-NEXT: s_cbranch_execz .LBB8_4
; GFX906-NEXT: ; %bb.3: ; %bb.2
; GFX906-NEXT: v_mov_b32_e32 v0, 0
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: global_store_dwordx2 v0, v[1:2], s[4:5]
; GFX906-NEXT: global_store_dwordx2 v0, v[1:2], s[8:9]
; GFX906-NEXT: .LBB8_4: ; %bb.3
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: v_mov_b32_e32 v0, 0
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: global_store_dwordx2 v0, v[1:2], s[6:7]
; GFX906-NEXT: global_store_dwordx2 v0, v[1:2], s[10:11]
; GFX906-NEXT: s_endpgm
entry:
%idx = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down Expand Up @@ -533,31 +533,31 @@ bb.3:
define amdgpu_kernel void @v8i8_multi_block(ptr addrspace(1) %src1, ptr addrspace(1) %src2, ptr addrspace(1) nocapture %dst0, ptr addrspace(1) nocapture %dst1) {
; GFX906-LABEL: v8i8_multi_block:
; GFX906: ; %bb.0: ; %entry
; GFX906-NEXT: s_load_dwordx8 s[0:7], s[2:3], 0x24
; GFX906-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x24
; GFX906-NEXT: v_lshlrev_b32_e32 v5, 3, v0
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 15, v0
; GFX906-NEXT: s_waitcnt lgkmcnt(0)
; GFX906-NEXT: global_load_dwordx2 v[3:4], v5, s[0:1]
; GFX906-NEXT: global_load_dwordx2 v[3:4], v5, s[4:5]
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: v_mov_b32_e32 v1, v3
; GFX906-NEXT: v_mov_b32_e32 v2, v4
; GFX906-NEXT: s_and_saveexec_b64 s[0:1], vcc
; GFX906-NEXT: s_cbranch_execz .LBB9_4
; GFX906-NEXT: ; %bb.1: ; %bb.1
; GFX906-NEXT: global_load_dwordx2 v[1:2], v5, s[2:3]
; GFX906-NEXT: global_load_dwordx2 v[1:2], v5, s[6:7]
; GFX906-NEXT: v_cmp_gt_u32_e32 vcc, 7, v0
; GFX906-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX906-NEXT: s_cbranch_execz .LBB9_3
; GFX906-NEXT: ; %bb.2: ; %bb.2
; GFX906-NEXT: v_mov_b32_e32 v0, 0
; GFX906-NEXT: global_store_dwordx2 v0, v[3:4], s[4:5]
; GFX906-NEXT: global_store_dwordx2 v0, v[3:4], s[8:9]
; GFX906-NEXT: .LBB9_3: ; %Flow
; GFX906-NEXT: s_or_b64 exec, exec, s[2:3]
; GFX906-NEXT: .LBB9_4: ; %bb.3
; GFX906-NEXT: s_or_b64 exec, exec, s[0:1]
; GFX906-NEXT: v_mov_b32_e32 v0, 0
; GFX906-NEXT: s_waitcnt vmcnt(0)
; GFX906-NEXT: global_store_dwordx2 v0, v[1:2], s[6:7]
; GFX906-NEXT: global_store_dwordx2 v0, v[1:2], s[10:11]
; GFX906-NEXT: s_endpgm
entry:
%idx = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down
60 changes: 30 additions & 30 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 {
; GFX8-LABEL: constant_load_i8_align4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -18,7 +18,7 @@ define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr a
;
; GFX9-LABEL: constant_load_i8_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -29,7 +29,7 @@ define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr a
;
; GFX10-LABEL: constant_load_i8_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -45,7 +45,7 @@ define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr a
define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 {
; GFX8-LABEL: constant_load_i16_align4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -57,7 +57,7 @@ define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr
;
; GFX9-LABEL: constant_load_i16_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -68,7 +68,7 @@ define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr
;
; GFX10-LABEL: constant_load_i16_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -84,7 +84,7 @@ define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr
define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: sextload_i8_to_i32_align4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -97,7 +97,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX9-LABEL: sextload_i8_to_i32_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -109,7 +109,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX10-LABEL: sextload_i8_to_i32_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -127,7 +127,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: sextload_i16_to_i32_align4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -140,7 +140,7 @@ define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX9-LABEL: sextload_i16_to_i32_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -152,7 +152,7 @@ define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX10-LABEL: sextload_i16_to_i32_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -170,7 +170,7 @@ define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: zextload_i8_to_i32_align4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -183,7 +183,7 @@ define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX9-LABEL: zextload_i8_to_i32_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -195,7 +195,7 @@ define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX10-LABEL: zextload_i8_to_i32_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -213,7 +213,7 @@ define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: zextload_i16_to_i32_align4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -226,7 +226,7 @@ define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX9-LABEL: zextload_i16_to_i32_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -238,7 +238,7 @@ define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX10-LABEL: zextload_i16_to_i32_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s0, s[6:7], 0x0
Expand All @@ -256,7 +256,7 @@ define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: constant_load_i8_align2:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
Expand All @@ -269,7 +269,7 @@ define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr ad
;
; GFX9-LABEL: constant_load_i8_align2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ubyte v1, v0, s[6:7]
Expand All @@ -279,7 +279,7 @@ define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr ad
;
; GFX10-LABEL: constant_load_i8_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ubyte v1, v0, s[6:7]
Expand All @@ -294,7 +294,7 @@ define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr ad
define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: constant_load_i16_align2:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
Expand All @@ -307,7 +307,7 @@ define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr a
;
; GFX9-LABEL: constant_load_i16_align2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v1, v0, s[6:7]
Expand All @@ -317,7 +317,7 @@ define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr a
;
; GFX10-LABEL: constant_load_i16_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ushort v1, v0, s[6:7]
Expand All @@ -332,7 +332,7 @@ define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr a
define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: constant_sextload_i8_align2:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
Expand All @@ -351,7 +351,7 @@ define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, pt
;
; GFX9-LABEL: constant_sextload_i8_align2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_sbyte v1, v0, s[6:7]
Expand All @@ -362,7 +362,7 @@ define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, pt
;
; GFX10-LABEL: constant_sextload_i8_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_sbyte v1, v0, s[6:7]
Expand All @@ -379,7 +379,7 @@ define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, pt
define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: constant_zextload_i8_align2:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
Expand All @@ -398,7 +398,7 @@ define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, pt
;
; GFX9-LABEL: constant_zextload_i8_align2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ubyte v1, v0, s[6:7]
Expand All @@ -409,7 +409,7 @@ define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, pt
;
; GFX10-LABEL: constant_zextload_i8_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ubyte v1, v0, s[6:7]
Expand Down
380 changes: 194 additions & 186 deletions llvm/test/CodeGen/AMDGPU/add.ll

Large diffs are not rendered by default.

224 changes: 122 additions & 102 deletions llvm/test/CodeGen/AMDGPU/add.v2i16.ll

Large diffs are not rendered by default.

26 changes: 13 additions & 13 deletions llvm/test/CodeGen/AMDGPU/amd.endpgm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ define void @test1() {
define amdgpu_kernel void @test2(ptr %p, i32 %x) {
; GFX9-LABEL: test2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX9-NEXT: s_load_dword s0, s[2:3], 0x2c
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_cmp_lt_i32 s2, 1
; GFX9-NEXT: s_cmp_lt_i32 s0, 1
; GFX9-NEXT: s_cbranch_scc0 .LBB2_2
; GFX9-NEXT: ; %bb.1: ; %else
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, s2
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, s0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: v_mov_b32_e32 v1, s5
Expand All @@ -58,13 +58,13 @@ define amdgpu_kernel void @test2(ptr %p, i32 %x) {
;
; GFX10-LABEL: test2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX10-NEXT: s_load_dword s0, s[2:3], 0x2c
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_cmp_lt_i32 s2, 1
; GFX10-NEXT: s_cmp_lt_i32 s0, 1
; GFX10-NEXT: s_cbranch_scc0 .LBB2_2
; GFX10-NEXT: ; %bb.1: ; %else
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; GFX10-NEXT: v_mov_b32_e32 v2, s2
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v2, s0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: v_mov_b32_e32 v1, s5
Expand All @@ -75,15 +75,15 @@ define amdgpu_kernel void @test2(ptr %p, i32 %x) {
;
; GFX11-LABEL: test2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x2c
; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x2c
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_cmp_lt_i32 s2, 1
; GFX11-NEXT: s_cmp_lt_i32 s0, 1
; GFX11-NEXT: s_cbranch_scc0 .LBB2_2
; GFX11-NEXT: ; %bb.1: ; %else
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: v_mov_b32_e32 v2, s2
; GFX11-NEXT: s_load_b64 s[2:3], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v2, s0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: flat_store_b32 v[0:1], v2
; GFX11-NEXT: s_endpgm
; GFX11-NEXT: .LBB2_2: ; %then
Expand Down
3,189 changes: 1,597 additions & 1,592 deletions llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Large diffs are not rendered by default.

38 changes: 19 additions & 19 deletions llvm/test/CodeGen/AMDGPU/anyext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
define amdgpu_kernel void @anyext_i1_i32(ptr addrspace(1) %out, i32 %cond) #0 {
; GCN-LABEL: anyext_i1_i32:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dword s4, s[0:1], 0xb
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GCN-NEXT: s_load_dword s4, s[2:3], 0xb
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -22,8 +22,8 @@ define amdgpu_kernel void @anyext_i1_i32(ptr addrspace(1) %out, i32 %cond) #0 {
;
; GFX8-LABEL: anyext_i1_i32:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dword s4, s[0:1], 0x2c
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_load_dword s4, s[2:3], 0x2c
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -37,17 +37,17 @@ define amdgpu_kernel void @anyext_i1_i32(ptr addrspace(1) %out, i32 %cond) #0 {
;
; GFX9-LABEL: anyext_i1_i32:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_load_dword s4, s[2:3], 0x2c
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s2, 0
; GFX9-NEXT: s_cselect_b64 s[0:1], -1, 0
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX9-NEXT: v_not_b32_e32 v0, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
entry:
%tmp = icmp eq i32 %cond, 0
Expand All @@ -62,8 +62,8 @@ entry:
define amdgpu_kernel void @s_anyext_i16_i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 {
; GCN-LABEL: s_anyext_i16_i32:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd
; GCN-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xd
; GCN-NEXT: s_mov_b32 s11, 0xf000
; GCN-NEXT: s_mov_b32 s14, 0
; GCN-NEXT: s_mov_b32 s15, s11
Expand All @@ -88,8 +88,8 @@ define amdgpu_kernel void @s_anyext_i16_i32(ptr addrspace(1) %out, ptr addrspace
;
; GFX8-LABEL: s_anyext_i16_i32:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v3, s7
Expand All @@ -113,13 +113,13 @@ define amdgpu_kernel void @s_anyext_i16_i32(ptr addrspace(1) %out, ptr addrspace
;
; GFX9-LABEL: s_anyext_i16_i32:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v2, v0, s[6:7]
; GFX9-NEXT: global_load_ushort v3, v1, s[2:3]
; GFX9-NEXT: global_load_ushort v3, v1, s[0:1]
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: s_waitcnt vmcnt(0)
Expand Down
1,712 changes: 949 additions & 763 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_buffer.ll

Large diffs are not rendered by default.

2,679 changes: 1,764 additions & 915 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_global_pointer.ll

Large diffs are not rendered by default.

4,032 changes: 2,472 additions & 1,560 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll

Large diffs are not rendered by default.

1,359 changes: 754 additions & 605 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_raw_buffer.ll

Large diffs are not rendered by default.

1,509 changes: 828 additions & 681 deletions llvm/test/CodeGen/AMDGPU/atomic_optimizations_struct_buffer.ll

Large diffs are not rendered by default.

48 changes: 24 additions & 24 deletions llvm/test/CodeGen/AMDGPU/atomics_cond_sub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ declare i32 @llvm.amdgcn.atomic.cond.sub.u32.p0(ptr, i32)
define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32(ptr %addr, i32 %in) {
; GFX12-SDAG-LABEL: flat_atomic_cond_sub_no_rtn_u32:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[2:3], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
Expand All @@ -18,7 +18,7 @@ define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32(ptr %addr, i32 %in) {
;
; GFX12-GISEL-LABEL: flat_atomic_cond_sub_no_rtn_u32:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[2:3], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
Expand All @@ -33,7 +33,7 @@ entry:
define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32_forced(ptr %addr, i32 %in) "target-features"="+atomic-csub-no-rtn-insts" {
; GFX12-SDAG-LABEL: flat_atomic_cond_sub_no_rtn_u32_forced:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[2:3], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s2
Expand All @@ -42,7 +42,7 @@ define amdgpu_kernel void @flat_atomic_cond_sub_no_rtn_u32_forced(ptr %addr, i32
;
; GFX12-GISEL-LABEL: flat_atomic_cond_sub_no_rtn_u32_forced:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[2:3], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v1, s1
Expand All @@ -58,8 +58,8 @@ define amdgpu_kernel void @flat_atomic_cond_sub_rtn_u32(ptr %addr, i32 %in, ptr
; GFX12-SDAG-LABEL: flat_atomic_cond_sub_rtn_u32:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_clause 0x1
; GFX12-SDAG-NEXT: s_load_b96 s[4:6], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX12-SDAG-NEXT: s_load_b96 s[4:6], s[2:3], 0x24
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5
; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, s6
Expand All @@ -72,8 +72,8 @@ define amdgpu_kernel void @flat_atomic_cond_sub_rtn_u32(ptr %addr, i32 %in, ptr
; GFX12-GISEL-LABEL: flat_atomic_cond_sub_rtn_u32:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_clause 0x1
; GFX12-GISEL-NEXT: s_load_b96 s[4:6], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX12-GISEL-NEXT: s_load_b96 s[4:6], s[2:3], 0x24
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, s4
; GFX12-GISEL-NEXT: v_dual_mov_b32 v2, s6 :: v_dual_mov_b32 v1, s5
Expand All @@ -92,15 +92,15 @@ entry:
define amdgpu_kernel void @global_atomic_cond_sub_no_rtn_u32(ptr addrspace(1) %addr, i32 %in) {
; GFX12-SDAG-LABEL: global_atomic_cond_sub_no_rtn_u32:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[2:3], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v0, v0, v1, s[0:1] offset:-16 th:TH_ATOMIC_RETURN
; GFX12-SDAG-NEXT: s_endpgm
;
; GFX12-GISEL-LABEL: global_atomic_cond_sub_no_rtn_u32:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[2:3], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v0, v1, v0, s[0:1] offset:-16 th:TH_ATOMIC_RETURN
Expand All @@ -114,7 +114,7 @@ entry:
define amdgpu_kernel void @global_atomic_cond_sub_no_rtn_u32_forced(ptr addrspace(1) %addr, i32 %in) "target-features"="+atomic-csub-no-rtn-insts" {
; GFX12-SDAG-LABEL: global_atomic_cond_sub_no_rtn_u32_forced:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[2:3], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v0, v1, s[0:1] offset:-16
Expand All @@ -124,7 +124,7 @@ define amdgpu_kernel void @global_atomic_cond_sub_no_rtn_u32_forced(ptr addrspac
;
; GFX12-GISEL-LABEL: global_atomic_cond_sub_no_rtn_u32_forced:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[2:3], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2
; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v1, v0, s[0:1] offset:-16
Expand All @@ -140,11 +140,11 @@ entry:
define amdgpu_kernel void @global_atomic_cond_sub_rtn_u32(ptr addrspace(1) %addr, i32 %in, ptr addrspace(1) %use) {
; GFX12-SDAG-LABEL: global_atomic_cond_sub_rtn_u32:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b96 s[4:6], s[0:1], 0x24
; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX12-SDAG-NEXT: s_clause 0x1
; GFX12-SDAG-NEXT: s_load_b96 s[4:6], s[2:3], 0x24
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: v_mov_b32_e32 v1, s6
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s6
; GFX12-SDAG-NEXT: global_atomic_cond_sub_u32 v1, v0, v1, s[4:5] offset:16 th:TH_ATOMIC_RETURN
; GFX12-SDAG-NEXT: s_wait_loadcnt 0x0
; GFX12-SDAG-NEXT: global_store_b32 v0, v1, s[0:1]
Expand All @@ -155,8 +155,8 @@ define amdgpu_kernel void @global_atomic_cond_sub_rtn_u32(ptr addrspace(1) %addr
; GFX12-GISEL-LABEL: global_atomic_cond_sub_rtn_u32:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_clause 0x1
; GFX12-GISEL-NEXT: s_load_b96 s[4:6], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX12-GISEL-NEXT: s_load_b96 s[4:6], s[2:3], 0x24
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s6
; GFX12-GISEL-NEXT: global_atomic_cond_sub_u32 v0, v1, v0, s[4:5] offset:16 th:TH_ATOMIC_RETURN
Expand All @@ -175,7 +175,7 @@ entry:
define amdgpu_kernel void @ds_cond_sub_no_rtn_u32(ptr addrspace(3) %addr, i32 %in) {
; GFX12-SDAG-LABEL: ds_cond_sub_no_rtn_u32:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, -16
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
Expand All @@ -185,7 +185,7 @@ define amdgpu_kernel void @ds_cond_sub_no_rtn_u32(ptr addrspace(3) %addr, i32 %i
;
; GFX12-GISEL-LABEL: ds_cond_sub_no_rtn_u32:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, -16
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
Expand All @@ -201,7 +201,7 @@ entry:
define amdgpu_kernel void @ds_cond_sub_no_rtn_u32_forced(ptr addrspace(3) %addr, i32 %in) "target-features"="+atomic-csub-no-rtn-insts" {
; GFX12-SDAG-LABEL: ds_cond_sub_no_rtn_u32_forced:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: s_add_co_i32 s0, s0, -16
; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
Expand All @@ -211,7 +211,7 @@ define amdgpu_kernel void @ds_cond_sub_no_rtn_u32_forced(ptr addrspace(3) %addr,
;
; GFX12-GISEL-LABEL: ds_cond_sub_no_rtn_u32_forced:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, -16
; GFX12-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
Expand All @@ -227,7 +227,7 @@ entry:
define amdgpu_kernel void @ds_cond_sub_rtn_u32(ptr addrspace(3) %addr, i32 %in, ptr addrspace(3) %use) {
; GFX12-SDAG-LABEL: ds_cond_sub_rtn_u32:
; GFX12-SDAG: ; %bb.0: ; %entry
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[2:3], 0x24
; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0
; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-SDAG-NEXT: ds_cond_sub_rtn_u32 v0, v0, v1 offset:16
Expand All @@ -238,7 +238,7 @@ define amdgpu_kernel void @ds_cond_sub_rtn_u32(ptr addrspace(3) %addr, i32 %in,
;
; GFX12-GISEL-LABEL: ds_cond_sub_rtn_u32:
; GFX12-GISEL: ; %bb.0: ; %entry
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[0:1], 0x24
; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[2:3], 0x24
; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0
; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s1 :: v_dual_mov_b32 v1, s0
; GFX12-GISEL-NEXT: ds_cond_sub_rtn_u32 v0, v1, v0 offset:16
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AMDGPU/bfe-combine.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,10 @@
define amdgpu_kernel void @bfe_combine8(ptr addrspace(1) nocapture %arg, i32 %x) {
; VI-LABEL: bfe_combine8:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[2:3], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v0
; VI-NEXT: v_bfe_u32 v0, v0, 8, 8
; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; VI-NEXT: v_mov_b32_e32 v1, s1
Expand All @@ -24,11 +24,11 @@ define amdgpu_kernel void @bfe_combine8(ptr addrspace(1) nocapture %arg, i32 %x)
;
; VI-SDWA-LABEL: bfe_combine8:
; VI-SDWA: ; %bb.0:
; VI-SDWA-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-SDWA-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-SDWA-NEXT: s_load_dword s4, s[2:3], 0x2c
; VI-SDWA-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; VI-SDWA-NEXT: v_mov_b32_e32 v1, 2
; VI-SDWA-NEXT: s_waitcnt lgkmcnt(0)
; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s4, v0
; VI-SDWA-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1
; VI-SDWA-NEXT: v_mov_b32_e32 v1, s1
; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s0, v0
Expand All @@ -42,13 +42,13 @@ define amdgpu_kernel void @bfe_combine8(ptr addrspace(1) nocapture %arg, i32 %x)
;
; CI-LABEL: bfe_combine8:
; CI: ; %bb.0:
; CI-NEXT: s_load_dword s2, s[0:1], 0xb
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_load_dword s4, s[2:3], 0xb
; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s6, 0
; CI-NEXT: s_mov_b32 s7, s3
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_add_i32_e32 v0, vcc, s2, v0
; CI-NEXT: v_add_i32_e32 v0, vcc, s4, v0
; CI-NEXT: v_lshrrev_b32_e32 v0, 6, v0
; CI-NEXT: s_mov_b64 s[4:5], s[0:1]
; CI-NEXT: v_and_b32_e32 v0, 0x3fc, v0
Expand All @@ -71,11 +71,11 @@ define amdgpu_kernel void @bfe_combine8(ptr addrspace(1) nocapture %arg, i32 %x)
define amdgpu_kernel void @bfe_combine16(ptr addrspace(1) nocapture %arg, i32 %x) {
; VI-LABEL: bfe_combine16:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[2:3], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; VI-NEXT: v_mov_b32_e32 v1, 0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; VI-NEXT: v_add_u32_e32 v0, vcc, s4, v0
; VI-NEXT: v_bfe_u32 v0, v0, 16, 16
; VI-NEXT: v_lshlrev_b32_e32 v0, 15, v0
; VI-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
Expand All @@ -91,11 +91,11 @@ define amdgpu_kernel void @bfe_combine16(ptr addrspace(1) nocapture %arg, i32 %x
;
; VI-SDWA-LABEL: bfe_combine16:
; VI-SDWA: ; %bb.0:
; VI-SDWA-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-SDWA-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-SDWA-NEXT: s_load_dword s4, s[2:3], 0x2c
; VI-SDWA-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; VI-SDWA-NEXT: v_mov_b32_e32 v1, 15
; VI-SDWA-NEXT: s_waitcnt lgkmcnt(0)
; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s2, v0
; VI-SDWA-NEXT: v_add_u32_e32 v0, vcc, s4, v0
; VI-SDWA-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
; VI-SDWA-NEXT: v_mov_b32_e32 v1, 0
; VI-SDWA-NEXT: v_lshlrev_b64 v[0:1], 2, v[0:1]
Expand All @@ -111,13 +111,13 @@ define amdgpu_kernel void @bfe_combine16(ptr addrspace(1) nocapture %arg, i32 %x
;
; CI-LABEL: bfe_combine16:
; CI: ; %bb.0:
; CI-NEXT: s_load_dword s2, s[0:1], 0xb
; CI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; CI-NEXT: s_load_dword s4, s[2:3], 0xb
; CI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; CI-NEXT: v_mov_b32_e32 v1, 0
; CI-NEXT: s_mov_b32 s3, 0xf000
; CI-NEXT: s_mov_b32 s6, 0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_add_i32_e32 v0, vcc, s2, v0
; CI-NEXT: v_add_i32_e32 v0, vcc, s4, v0
; CI-NEXT: v_lshrrev_b32_e32 v0, 1, v0
; CI-NEXT: v_and_b32_e32 v0, 0x7fff8000, v0
; CI-NEXT: v_lshl_b64 v[0:1], v[0:1], 2
Expand Down
56 changes: 28 additions & 28 deletions llvm/test/CodeGen/AMDGPU/bfe-patterns.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
; SI-LABEL: v_ubfe_sub_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
Expand All @@ -23,7 +23,7 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
;
; VI-LABEL: v_ubfe_sub_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s3
Expand Down Expand Up @@ -55,7 +55,7 @@ define amdgpu_kernel void @v_ubfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
; SI-LABEL: v_ubfe_sub_multi_use_shl_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
Expand All @@ -78,7 +78,7 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
;
; VI-LABEL: v_ubfe_sub_multi_use_shl_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s3
Expand Down Expand Up @@ -115,7 +115,7 @@ define amdgpu_kernel void @v_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
define amdgpu_kernel void @s_ubfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
; SI-LABEL: s_ubfe_sub_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
Expand All @@ -131,7 +131,7 @@ define amdgpu_kernel void @s_ubfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %
;
; VI-LABEL: s_ubfe_sub_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0
Expand All @@ -155,7 +155,7 @@ define amdgpu_kernel void @s_ubfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %
define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
; SI-LABEL: s_ubfe_sub_multi_use_shl_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
Expand All @@ -175,7 +175,7 @@ define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i
;
; VI-LABEL: s_ubfe_sub_multi_use_shl_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0
Expand Down Expand Up @@ -203,7 +203,7 @@ define amdgpu_kernel void @s_ubfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i
define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
; SI-LABEL: v_sbfe_sub_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
Expand All @@ -221,7 +221,7 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
;
; VI-LABEL: v_sbfe_sub_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s3
Expand Down Expand Up @@ -253,7 +253,7 @@ define amdgpu_kernel void @v_sbfe_sub_i32(ptr addrspace(1) %out, ptr addrspace(1
define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) #1 {
; SI-LABEL: v_sbfe_sub_multi_use_shl_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
Expand All @@ -276,7 +276,7 @@ define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
;
; VI-LABEL: v_sbfe_sub_multi_use_shl_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v1, s3
Expand Down Expand Up @@ -313,7 +313,7 @@ define amdgpu_kernel void @v_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, p
define amdgpu_kernel void @s_sbfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
; SI-LABEL: s_sbfe_sub_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
Expand All @@ -329,7 +329,7 @@ define amdgpu_kernel void @s_sbfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %
;
; VI-LABEL: s_sbfe_sub_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0
Expand All @@ -353,7 +353,7 @@ define amdgpu_kernel void @s_sbfe_sub_i32(ptr addrspace(1) %out, i32 %src, i32 %
define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i32 %src, i32 %width) #1 {
; SI-LABEL: s_sbfe_sub_multi_use_shl_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
Expand All @@ -373,7 +373,7 @@ define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i
;
; VI-LABEL: s_sbfe_sub_multi_use_shl_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v0
Expand Down Expand Up @@ -401,8 +401,8 @@ define amdgpu_kernel void @s_sbfe_sub_multi_use_shl_i32(ptr addrspace(1) %out, i
define amdgpu_kernel void @s_sbfe_or_shl_shl_uniform_i32(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1) {
; SI-LABEL: s_sbfe_or_shl_shl_uniform_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd
; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xd
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_load_dword s2, s[6:7], 0x0
; SI-NEXT: s_load_dword s0, s[0:1], 0x0
Expand All @@ -417,8 +417,8 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_uniform_i32(ptr addrspace(1) %out,
;
; VI-LABEL: s_sbfe_or_shl_shl_uniform_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dword s2, s[6:7], 0x0
; VI-NEXT: s_load_dword s0, s[0:1], 0x0
Expand All @@ -444,8 +444,8 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_uniform_i32(ptr addrspace(1) %out,
define amdgpu_kernel void @s_sbfe_or_shl_shl_nonuniform_i32(ptr addrspace(1) %out, ptr addrspace(1) %x, ptr addrspace(1) %y) {
; SI-LABEL: s_sbfe_or_shl_shl_nonuniform_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd
; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xd
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_load_dword s2, s[6:7], 0x0
; SI-NEXT: s_load_dword s0, s[0:1], 0x0
Expand All @@ -462,8 +462,8 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_nonuniform_i32(ptr addrspace(1) %ou
;
; VI-LABEL: s_sbfe_or_shl_shl_nonuniform_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dword s2, s[6:7], 0x0
; VI-NEXT: s_load_dword s0, s[0:1], 0x0
Expand Down Expand Up @@ -491,8 +491,8 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_nonuniform_i32(ptr addrspace(1) %ou
define amdgpu_kernel void @s_sbfe_or_shl_shl_toosmall_i32(ptr addrspace(1) %out, ptr addrspace(1) %x, ptr addrspace(1) %y) {
; SI-LABEL: s_sbfe_or_shl_shl_toosmall_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xd
; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xd
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_load_dword s2, s[6:7], 0x0
; SI-NEXT: s_load_dword s0, s[0:1], 0x0
Expand All @@ -509,8 +509,8 @@ define amdgpu_kernel void @s_sbfe_or_shl_shl_toosmall_i32(ptr addrspace(1) %out,
;
; VI-LABEL: s_sbfe_or_shl_shl_toosmall_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34
; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_load_dword s2, s[6:7], 0x0
; VI-NEXT: s_load_dword s0, s[0:1], 0x0
Expand Down
236 changes: 115 additions & 121 deletions llvm/test/CodeGen/AMDGPU/bfi_int.ll

Large diffs are not rendered by default.

16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/bfm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
define amdgpu_kernel void @s_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y) #0 {
; SI-LABEL: s_bfm_pattern:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bfm_b32 s2, s2, s3
Expand All @@ -18,7 +18,7 @@ define amdgpu_kernel void @s_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y)
;
; VI-LABEL: s_bfm_pattern:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bfm_b32 s2, s2, s3
; VI-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -36,22 +36,22 @@ define amdgpu_kernel void @s_bfm_pattern(ptr addrspace(1) %out, i32 %x, i32 %y)
define amdgpu_kernel void @s_bfm_pattern_simple(ptr addrspace(1) %out, i32 %x) #0 {
; SI-LABEL: s_bfm_pattern_simple:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s2, s[0:1], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bfm_b32 s4, s2, 0
; SI-NEXT: s_bfm_b32 s4, s4, 0
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: s_bfm_pattern_simple:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[0:1], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; VI-NEXT: s_load_dword s4, s[2:3], 0x2c
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bfm_b32 s2, s2, 0
; VI-NEXT: s_bfm_b32 s2, s4, 0
; VI-NEXT: v_mov_b32_e32 v0, s0
; VI-NEXT: v_mov_b32_e32 v1, s1
; VI-NEXT: v_mov_b32_e32 v2, s2
Expand Down
162 changes: 89 additions & 73 deletions llvm/test/CodeGen/AMDGPU/bitreverse.ll

Large diffs are not rendered by default.

64 changes: 32 additions & 32 deletions llvm/test/CodeGen/AMDGPU/br_cc.f16.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,18 +6,18 @@
define amdgpu_kernel void @br_cc_f16(
; SI-LABEL: br_cc_f16:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
; SI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0xd
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_mov_b32 s10, s2
; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xd
; SI-NEXT: s_mov_b32 s11, 0xf000
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_mov_b32 s2, s10
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s0, s6
; SI-NEXT: s_mov_b32 s1, s7
; SI-NEXT: s_mov_b32 s11, s3
; SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
; SI-NEXT: s_mov_b32 s8, s6
; SI-NEXT: s_mov_b32 s9, s7
; SI-NEXT: s_mov_b32 s3, s11
; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 glc
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 glc
; SI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 glc
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
Expand All @@ -29,28 +29,28 @@ define amdgpu_kernel void @br_cc_f16(
; SI-NEXT: .LBB0_2: ; %two
; SI-NEXT: v_cvt_f16_f32_e32 v0, v1
; SI-NEXT: .LBB0_3: ; %one
; SI-NEXT: s_mov_b32 s6, s2
; SI-NEXT: s_mov_b32 s7, s3
; SI-NEXT: s_mov_b32 s6, s10
; SI-NEXT: s_mov_b32 s7, s11
; SI-NEXT: buffer_store_short v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: br_cc_f16:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; VI-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x34
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
; VI-NEXT: s_mov_b32 s10, s2
; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; VI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; VI-NEXT: s_mov_b32 s11, 0xf000
; VI-NEXT: s_mov_b32 s10, -1
; VI-NEXT: s_mov_b32 s2, s10
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_mov_b32 s0, s6
; VI-NEXT: s_mov_b32 s1, s7
; VI-NEXT: s_mov_b32 s11, s3
; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc
; VI-NEXT: s_mov_b32 s8, s6
; VI-NEXT: s_mov_b32 s9, s7
; VI-NEXT: s_mov_b32 s3, s11
; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: buffer_load_ushort v1, off, s[8:11], 0 glc
; VI-NEXT: buffer_load_ushort v1, off, s[0:3], 0 glc
; VI-NEXT: s_waitcnt vmcnt(0)
; VI-NEXT: s_mov_b32 s6, s2
; VI-NEXT: s_mov_b32 s7, s3
; VI-NEXT: s_mov_b32 s6, s10
; VI-NEXT: s_mov_b32 s7, s11
; VI-NEXT: v_cmp_nlt_f16_e32 vcc, v0, v1
; VI-NEXT: s_cbranch_vccnz .LBB0_2
; VI-NEXT: ; %bb.1: ; %one
Expand All @@ -63,8 +63,8 @@ define amdgpu_kernel void @br_cc_f16(
; GFX11-LABEL: br_cc_f16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[8:9], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[8:9], s[2:3], 0x34
; GFX11-NEXT: s_mov_b32 s2, -1
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-NEXT: s_mov_b32 s10, s2
Expand Down Expand Up @@ -111,7 +111,7 @@ two:
define amdgpu_kernel void @br_cc_f16_imm_a(
; SI-LABEL: br_cc_f16_imm_a:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -137,7 +137,7 @@ define amdgpu_kernel void @br_cc_f16_imm_a(
;
; VI-LABEL: br_cc_f16_imm_a:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -157,7 +157,7 @@ define amdgpu_kernel void @br_cc_f16_imm_a(
;
; GFX11-LABEL: br_cc_f16_imm_a:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
; GFX11-NEXT: s_mov_b32 s6, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -195,7 +195,7 @@ two:
define amdgpu_kernel void @br_cc_f16_imm_b(
; SI-LABEL: br_cc_f16_imm_b:
; SI: ; %bb.0: ; %entry
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -221,7 +221,7 @@ define amdgpu_kernel void @br_cc_f16_imm_b(
;
; VI-LABEL: br_cc_f16_imm_b:
; VI: ; %bb.0: ; %entry
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
; VI-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -243,7 +243,7 @@ define amdgpu_kernel void @br_cc_f16_imm_b(
;
; GFX11-LABEL: br_cc_f16_imm_b:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_mov_b32 s7, 0x31016000
; GFX11-NEXT: s_mov_b32 s6, -1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
Expand Down
42 changes: 21 additions & 21 deletions llvm/test/CodeGen/AMDGPU/bswap.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ declare i48 @llvm.bswap.i48(i48) #1
define amdgpu_kernel void @test_bswap_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_load_dword s4, s[2:3], 0x0
; SI-NEXT: s_mov_b32 s3, 0xf000
Expand All @@ -34,7 +34,7 @@ define amdgpu_kernel void @test_bswap_i32(ptr addrspace(1) %out, ptr addrspace(1
;
; VI-LABEL: test_bswap_i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x10203
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
Expand All @@ -49,7 +49,7 @@ define amdgpu_kernel void @test_bswap_i32(ptr addrspace(1) %out, ptr addrspace(1
;
; GFX11-LABEL: test_bswap_i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b32 s2, s[2:3], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
Expand All @@ -69,7 +69,7 @@ define amdgpu_kernel void @test_bswap_i32(ptr addrspace(1) %out, ptr addrspace(1
define amdgpu_kernel void @test_bswap_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_v2i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
; SI-NEXT: s_mov_b32 s3, 0xf000
Expand All @@ -87,7 +87,7 @@ define amdgpu_kernel void @test_bswap_v2i32(ptr addrspace(1) %out, ptr addrspace
;
; VI-LABEL: test_bswap_v2i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x10203
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
Expand All @@ -103,7 +103,7 @@ define amdgpu_kernel void @test_bswap_v2i32(ptr addrspace(1) %out, ptr addrspace
;
; GFX11-LABEL: test_bswap_v2i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[4:5], s[2:3], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
Expand All @@ -124,7 +124,7 @@ define amdgpu_kernel void @test_bswap_v2i32(ptr addrspace(1) %out, ptr addrspace
define amdgpu_kernel void @test_bswap_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_v4i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
; SI-NEXT: s_mov_b32 s3, 0xf000
Expand All @@ -148,7 +148,7 @@ define amdgpu_kernel void @test_bswap_v4i32(ptr addrspace(1) %out, ptr addrspace
;
; VI-LABEL: test_bswap_v4i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x10203
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
Expand All @@ -166,7 +166,7 @@ define amdgpu_kernel void @test_bswap_v4i32(ptr addrspace(1) %out, ptr addrspace
;
; GFX11-LABEL: test_bswap_v4i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
Expand All @@ -189,7 +189,7 @@ define amdgpu_kernel void @test_bswap_v4i32(ptr addrspace(1) %out, ptr addrspace
define amdgpu_kernel void @test_bswap_v8i32(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_v8i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0
; SI-NEXT: s_mov_b32 s3, 0xf000
Expand Down Expand Up @@ -226,7 +226,7 @@ define amdgpu_kernel void @test_bswap_v8i32(ptr addrspace(1) %out, ptr addrspace
;
; VI-LABEL: test_bswap_v8i32:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x24
; VI-NEXT: v_mov_b32_e32 v4, 0x10203
; VI-NEXT: s_mov_b32 s15, 0xf000
; VI-NEXT: s_mov_b32 s14, -1
Expand All @@ -249,7 +249,7 @@ define amdgpu_kernel void @test_bswap_v8i32(ptr addrspace(1) %out, ptr addrspace
;
; GFX11-LABEL: test_bswap_v8i32:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[8:11], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[8:11], s[2:3], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b256 s[0:7], s[10:11], 0x0
; GFX11-NEXT: s_mov_b32 s11, 0x31016000
Expand Down Expand Up @@ -278,7 +278,7 @@ define amdgpu_kernel void @test_bswap_v8i32(ptr addrspace(1) %out, ptr addrspace
define amdgpu_kernel void @test_bswap_i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
; SI-NEXT: s_mov_b32 s3, 0xf000
Expand All @@ -296,7 +296,7 @@ define amdgpu_kernel void @test_bswap_i64(ptr addrspace(1) %out, ptr addrspace(1
;
; VI-LABEL: test_bswap_i64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x10203
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
Expand All @@ -312,7 +312,7 @@ define amdgpu_kernel void @test_bswap_i64(ptr addrspace(1) %out, ptr addrspace(1
;
; GFX11-LABEL: test_bswap_i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b64 s[4:5], s[2:3], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
Expand All @@ -333,7 +333,7 @@ define amdgpu_kernel void @test_bswap_i64(ptr addrspace(1) %out, ptr addrspace(1
define amdgpu_kernel void @test_bswap_v2i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_v2i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
; SI-NEXT: s_mov_b32 s3, 0xf000
Expand All @@ -357,7 +357,7 @@ define amdgpu_kernel void @test_bswap_v2i64(ptr addrspace(1) %out, ptr addrspace
;
; VI-LABEL: test_bswap_v2i64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; VI-NEXT: v_mov_b32_e32 v0, 0x10203
; VI-NEXT: s_mov_b32 s7, 0xf000
; VI-NEXT: s_mov_b32 s6, -1
Expand All @@ -375,7 +375,7 @@ define amdgpu_kernel void @test_bswap_v2i64(ptr addrspace(1) %out, ptr addrspace
;
; GFX11-LABEL: test_bswap_v2i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
; GFX11-NEXT: s_mov_b32 s3, 0x31016000
Expand All @@ -398,7 +398,7 @@ define amdgpu_kernel void @test_bswap_v2i64(ptr addrspace(1) %out, ptr addrspace
define amdgpu_kernel void @test_bswap_v4i64(ptr addrspace(1) %out, ptr addrspace(1) %in) nounwind {
; SI-LABEL: test_bswap_v4i64:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0
; SI-NEXT: s_mov_b32 s3, 0xf000
Expand Down Expand Up @@ -435,7 +435,7 @@ define amdgpu_kernel void @test_bswap_v4i64(ptr addrspace(1) %out, ptr addrspace
;
; VI-LABEL: test_bswap_v4i64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx4 s[8:11], s[0:1], 0x24
; VI-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x24
; VI-NEXT: v_mov_b32_e32 v4, 0x10203
; VI-NEXT: s_mov_b32 s15, 0xf000
; VI-NEXT: s_mov_b32 s14, -1
Expand All @@ -458,7 +458,7 @@ define amdgpu_kernel void @test_bswap_v4i64(ptr addrspace(1) %out, ptr addrspace
;
; GFX11-LABEL: test_bswap_v4i64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[8:11], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[8:11], s[2:3], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b256 s[0:7], s[10:11], 0x0
; GFX11-NEXT: s_mov_b32 s11, 0x31016000
Expand Down
80 changes: 40 additions & 40 deletions llvm/test/CodeGen/AMDGPU/build_vector.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) {
; GFX6-LABEL: build_vector2:
; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mov_b32_e32 v0, 5
Expand All @@ -19,7 +19,7 @@ define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) {
;
; GFX8-LABEL: build_vector2:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX8-NEXT: v_mov_b32_e32 v0, 5
; GFX8-NEXT: v_mov_b32_e32 v1, 6
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -30,7 +30,7 @@ define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) {
;
; GFX10-LABEL: build_vector2:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: v_mov_b32_e32 v0, 5
; GFX10-NEXT: v_mov_b32_e32 v1, 6
Expand All @@ -40,7 +40,7 @@ define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) {
;
; GFX11-LABEL: build_vector2:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: v_mov_b32_e32 v0, 5
; GFX11-NEXT: v_mov_b32_e32 v1, 6
Expand All @@ -52,12 +52,12 @@ define amdgpu_kernel void @build_vector2 (ptr addrspace(1) %out) {
;
; GFX940-LABEL: build_vector2:
; GFX940: ; %bb.0: ; %entry
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX940-NEXT: v_mov_b32_e32 v2, 0
; GFX940-NEXT: v_mov_b32_e32 v0, 5
; GFX940-NEXT: v_mov_b32_e32 v1, 6
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[2:3] sc0 sc1
; GFX940-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] sc0 sc1
; GFX940-NEXT: s_endpgm
entry:
store <2 x i32> <i32 5, i32 6>, ptr addrspace(1) %out
Expand All @@ -67,7 +67,7 @@ entry:
define amdgpu_kernel void @build_vector4 (ptr addrspace(1) %out) {
; GFX6-LABEL: build_vector4:
; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mov_b32_e32 v0, 5
Expand All @@ -80,7 +80,7 @@ define amdgpu_kernel void @build_vector4 (ptr addrspace(1) %out) {
;
; GFX8-LABEL: build_vector4:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX8-NEXT: v_mov_b32_e32 v0, 5
; GFX8-NEXT: v_mov_b32_e32 v1, 6
; GFX8-NEXT: v_mov_b32_e32 v2, 7
Expand All @@ -93,7 +93,7 @@ define amdgpu_kernel void @build_vector4 (ptr addrspace(1) %out) {
;
; GFX10-LABEL: build_vector4:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX10-NEXT: v_mov_b32_e32 v4, 0
; GFX10-NEXT: v_mov_b32_e32 v0, 5
; GFX10-NEXT: v_mov_b32_e32 v1, 6
Expand All @@ -105,7 +105,7 @@ define amdgpu_kernel void @build_vector4 (ptr addrspace(1) %out) {
;
; GFX11-LABEL: build_vector4:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
; GFX11-NEXT: v_mov_b32_e32 v4, 0
; GFX11-NEXT: v_mov_b32_e32 v0, 5
; GFX11-NEXT: v_mov_b32_e32 v1, 6
Expand All @@ -119,14 +119,14 @@ define amdgpu_kernel void @build_vector4 (ptr addrspace(1) %out) {
;
; GFX940-LABEL: build_vector4:
; GFX940: ; %bb.0: ; %entry
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX940-NEXT: v_mov_b32_e32 v4, 0
; GFX940-NEXT: v_mov_b32_e32 v0, 5
; GFX940-NEXT: v_mov_b32_e32 v1, 6
; GFX940-NEXT: v_mov_b32_e32 v2, 7
; GFX940-NEXT: v_mov_b32_e32 v3, 8
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[2:3] sc0 sc1
; GFX940-NEXT: global_store_dwordx4 v4, v[0:3], s[0:1] sc0 sc1
; GFX940-NEXT: s_endpgm
entry:
store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, ptr addrspace(1) %out
Expand All @@ -136,7 +136,7 @@ entry:
define amdgpu_kernel void @build_vector_v2i16 (ptr addrspace(1) %out) {
; GFX6-LABEL: build_vector_v2i16:
; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: v_mov_b32_e32 v0, 0x60005
Expand All @@ -146,7 +146,7 @@ define amdgpu_kernel void @build_vector_v2i16 (ptr addrspace(1) %out) {
;
; GFX8-LABEL: build_vector_v2i16:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX8-NEXT: v_mov_b32_e32 v2, 0x60005
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -156,7 +156,7 @@ define amdgpu_kernel void @build_vector_v2i16 (ptr addrspace(1) %out) {
;
; GFX10-LABEL: build_vector_v2i16:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0x60005
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -165,7 +165,7 @@ define amdgpu_kernel void @build_vector_v2i16 (ptr addrspace(1) %out) {
;
; GFX11-LABEL: build_vector_v2i16:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_mov_b32_e32 v1, 0x60005
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -176,11 +176,11 @@ define amdgpu_kernel void @build_vector_v2i16 (ptr addrspace(1) %out) {
;
; GFX940-LABEL: build_vector_v2i16:
; GFX940: ; %bb.0: ; %entry
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX940-NEXT: v_mov_b32_e32 v0, 0
; GFX940-NEXT: v_mov_b32_e32 v1, 0x60005
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
; GFX940-NEXT: s_endpgm
entry:
store <2 x i16> <i16 5, i16 6>, ptr addrspace(1) %out
Expand All @@ -190,8 +190,8 @@ entry:
define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32 %a) {
; GFX6-LABEL: build_vector_v2i16_trunc:
; GFX6: ; %bb.0:
; GFX6-NEXT: s_load_dword s4, s[0:1], 0xb
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX6-NEXT: s_load_dword s4, s[2:3], 0xb
; GFX6-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; GFX6-NEXT: s_mov_b32 s3, 0xf000
; GFX6-NEXT: s_mov_b32 s2, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -201,10 +201,10 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32
;
; GFX8-LABEL: build_vector_v2i16_trunc:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dword s2, s[0:1], 0x2c
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_load_dword s4, s[2:3], 0x2c
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_lshr_b32 s2, s2, 16
; GFX8-NEXT: s_lshr_b32 s2, s4, 16
; GFX8-NEXT: s_or_b32 s2, s2, 0x50000
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
Expand All @@ -215,11 +215,11 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32
; GFX10-LABEL: build_vector_v2i16_trunc:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dword s2, s[0:1], 0x8
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX10-NEXT: s_load_dword s4, s[2:3], 0x8
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_lshr_b32 s2, s2, 16
; GFX10-NEXT: s_lshr_b32 s2, s4, 16
; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, 5
; GFX10-NEXT: v_mov_b32_e32 v1, s2
; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
Expand All @@ -228,11 +228,11 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32
; GFX11-LABEL: build_vector_v2i16_trunc:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b32 s2, s[0:1], 0x8
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_load_b32 s4, s[2:3], 0x8
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_pack_hl_b32_b16 s2, s2, 5
; GFX11-NEXT: s_pack_hl_b32_b16 s2, s4, 5
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX11-NEXT: v_mov_b32_e32 v1, s2
; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
Expand All @@ -242,14 +242,14 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32
;
; GFX940-LABEL: build_vector_v2i16_trunc:
; GFX940: ; %bb.0:
; GFX940-NEXT: s_load_dword s4, s[0:1], 0x2c
; GFX940-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x24
; GFX940-NEXT: s_load_dword s4, s[2:3], 0x2c
; GFX940-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX940-NEXT: v_mov_b32_e32 v0, 0
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_lshr_b32 s0, s4, 16
; GFX940-NEXT: s_pack_ll_b32_b16 s0, s0, 5
; GFX940-NEXT: v_mov_b32_e32 v1, s0
; GFX940-NEXT: global_store_dword v0, v1, s[2:3] sc0 sc1
; GFX940-NEXT: s_lshr_b32 s2, s4, 16
; GFX940-NEXT: s_pack_ll_b32_b16 s2, s2, 5
; GFX940-NEXT: v_mov_b32_e32 v1, s2
; GFX940-NEXT: global_store_dword v0, v1, s[0:1] sc0 sc1
; GFX940-NEXT: s_endpgm
%srl = lshr i32 %a, 16
%trunc = trunc i32 %srl to i16
Expand All @@ -262,7 +262,7 @@ define amdgpu_kernel void @build_vector_v2i16_trunc (ptr addrspace(1) %out, i32
define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out, <4 x i16> %in) {
; GFX6-LABEL: build_v2i32_from_v4i16_shuffle:
; GFX6: ; %bb.0: ; %entry
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: s_mov_b32 s6, -1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -277,7 +277,7 @@ define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out,
;
; GFX8-LABEL: build_v2i32_from_v4i16_shuffle:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_lshl_b32 s3, s3, 16
; GFX8-NEXT: s_lshl_b32 s2, s2, 16
Expand All @@ -290,7 +290,7 @@ define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out,
;
; GFX10-LABEL: build_v2i32_from_v4i16_shuffle:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_lshl_b32 s2, s2, 16
Expand All @@ -302,7 +302,7 @@ define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out,
;
; GFX11-LABEL: build_v2i32_from_v4i16_shuffle:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_lshl_b32 s2, s2, 16
Expand All @@ -316,7 +316,7 @@ define amdgpu_kernel void @build_v2i32_from_v4i16_shuffle(ptr addrspace(1) %out,
;
; GFX940-LABEL: build_v2i32_from_v4i16_shuffle:
; GFX940: ; %bb.0: ; %entry
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX940-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX940-NEXT: v_mov_b32_e32 v2, 0
; GFX940-NEXT: s_waitcnt lgkmcnt(0)
; GFX940-NEXT: s_lshl_b32 s0, s7, 16
Expand Down
Loading