15 changes: 13 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.queue.ptr.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,20 @@

; FIXME: Error on non-hsa target

; GCN-LABEL: {{^}}test:
; GCN-LABEL: {{^}}queue_ptr:
; GCN: s_load_dword s{{[0-9]+}}, s[6:7], 0x0
; GCN: .amdhsa_user_sgpr_queue_ptr 1
define amdgpu_kernel void @queue_ptr(ptr addrspace(1) %out) {
%queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
%value = load i32, ptr addrspace(4) %queue_ptr
store i32 %value, ptr addrspace(1) %out
ret void
}

; GCN-LABEL: {{^}}queue_ptr_opt:
; GCN: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
; GCN: .amdhsa_user_sgpr_queue_ptr 1
define amdgpu_kernel void @test(ptr addrspace(1) %out) {
define amdgpu_kernel void @queue_ptr_opt(ptr addrspace(1) %out) #1 {
%queue_ptr = call noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0
%value = load i32, ptr addrspace(4) %queue_ptr
store i32 %value, ptr addrspace(1) %out
Expand All @@ -15,6 +25,7 @@ define amdgpu_kernel void @test(ptr addrspace(1) %out) {
declare noalias ptr addrspace(4) @llvm.amdgcn.queue.ptr() #0

attributes #0 = { nounwind readnone }
attributes #1 = { "amdgpu-no-dispatch-ptr" }

!llvm.module.flags = !{!0}
!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
98 changes: 49 additions & 49 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll

Large diffs are not rendered by default.

86 changes: 43 additions & 43 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
define amdgpu_kernel void @set_inactive(ptr addrspace(1) %out, i32 %in) {
; GCN-LABEL: set_inactive:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s3
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v0, 42
; GCN-NEXT: s_not_b64 exec, exec
Expand All @@ -23,7 +23,7 @@ define amdgpu_kernel void @set_inactive(ptr addrspace(1) %out, i32 %in) {
define amdgpu_kernel void @set_inactive_64(ptr addrspace(1) %out, i64 %in) {
; GCN-LABEL: set_inactive_64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: v_mov_b32_e32 v1, s3
Expand All @@ -43,20 +43,20 @@ define amdgpu_kernel void @set_inactive_64(ptr addrspace(1) %out, i64 %in) {
define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x i32> inreg %desc) {
; GCN-LABEL: set_inactive_scc:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34
; GCN-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x34
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_buffer_load_dword s2, s[4:7], 0x0
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GCN-NEXT: s_buffer_load_dword s4, s[4:7], 0x0
; GCN-NEXT: s_load_dword s5, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s2, 56
; GCN-NEXT: s_cselect_b32 s4, 1, 0
; GCN-NEXT: v_mov_b32_e32 v0, s3
; GCN-NEXT: s_cmp_lg_u32 s4, 56
; GCN-NEXT: s_cselect_b32 s3, 1, 0
; GCN-NEXT: v_mov_b32_e32 v0, s5
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v0, 42
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: s_mov_b32 s2, 1
; GCN-NEXT: s_cmp_lg_u32 s4, 0
; GCN-NEXT: s_cmp_lg_u32 s3, 0
; GCN-NEXT: s_cbranch_scc0 .LBB2_2
; GCN-NEXT: ; %bb.1: ; %.one
; GCN-NEXT: v_add_u32_e32 v1, vcc, 1, v0
Expand Down Expand Up @@ -96,12 +96,12 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
define amdgpu_kernel void @set_inactive_f32(ptr addrspace(1) %out, float %in) {
; GCN-LABEL: set_inactive_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: v_mov_b32_e32 v1, 0x40400000
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s3
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: s_not_b64 exec, exec
Expand All @@ -116,7 +116,7 @@ define amdgpu_kernel void @set_inactive_f32(ptr addrspace(1) %out, float %in) {
define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
; GCN-LABEL: set_inactive_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_mov_b32 s4, 0xcccccccd
; GCN-NEXT: s_mov_b32 s5, 0x4010cccc
; GCN-NEXT: v_mov_b32_e32 v2, s4
Expand All @@ -140,12 +140,12 @@ define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
define amdgpu_kernel void @set_inactive_v2i16(ptr addrspace(1) %out, <2 x i16> %in) {
; GCN-LABEL: set_inactive_v2i16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: v_mov_b32_e32 v1, 0x10001
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s3
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: s_not_b64 exec, exec
Expand All @@ -160,12 +160,12 @@ define amdgpu_kernel void @set_inactive_v2i16(ptr addrspace(1) %out, <2 x i16> %
define amdgpu_kernel void @set_inactive_v2f16(ptr addrspace(1) %out, <2 x half> %in) {
; GCN-LABEL: set_inactive_v2f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: v_mov_b32_e32 v1, 0x3c003c00
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s3
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: s_not_b64 exec, exec
Expand All @@ -180,7 +180,7 @@ define amdgpu_kernel void @set_inactive_v2f16(ptr addrspace(1) %out, <2 x half>
define amdgpu_kernel void @set_inactive_v2i32(ptr addrspace(1) %out, <2 x i32> %in) {
; GCN-LABEL: set_inactive_v2i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_mov_b32 s4, 1
; GCN-NEXT: s_mov_b32 s5, s4
; GCN-NEXT: v_mov_b32_e32 v2, s4
Expand All @@ -204,7 +204,7 @@ define amdgpu_kernel void @set_inactive_v2i32(ptr addrspace(1) %out, <2 x i32> %
define amdgpu_kernel void @set_inactive_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
; GCN-LABEL: set_inactive_v2f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_mov_b32 s4, 1.0
; GCN-NEXT: s_mov_b32 s5, s4
; GCN-NEXT: v_mov_b32_e32 v2, s4
Expand All @@ -228,12 +228,12 @@ define amdgpu_kernel void @set_inactive_v2f32(ptr addrspace(1) %out, <2 x float>
define amdgpu_kernel void @set_inactive_v2bf16(ptr addrspace(1) %out, <2 x bfloat> %in) {
; GCN-LABEL: set_inactive_v2bf16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: v_mov_b32_e32 v1, 0x3f803f80
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s3
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v0, v1
; GCN-NEXT: s_not_b64 exec, exec
Expand All @@ -248,7 +248,7 @@ define amdgpu_kernel void @set_inactive_v2bf16(ptr addrspace(1) %out, <2 x bfloa
define amdgpu_kernel void @set_inactive_v4i16(ptr addrspace(1) %out, <4 x i16> %in) {
; GCN-LABEL: set_inactive_v4i16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_mov_b32 s4, 0x10001
; GCN-NEXT: s_mov_b32 s5, s4
; GCN-NEXT: v_mov_b32_e32 v2, s4
Expand All @@ -272,7 +272,7 @@ define amdgpu_kernel void @set_inactive_v4i16(ptr addrspace(1) %out, <4 x i16> %
define amdgpu_kernel void @set_inactive_v4f16(ptr addrspace(1) %out, <4 x half> %in) {
; GCN-LABEL: set_inactive_v4f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_mov_b32 s4, 0x3c003c00
; GCN-NEXT: s_mov_b32 s5, s4
; GCN-NEXT: v_mov_b32_e32 v2, s4
Expand All @@ -296,7 +296,7 @@ define amdgpu_kernel void @set_inactive_v4f16(ptr addrspace(1) %out, <4 x half>
define amdgpu_kernel void @set_inactive_v4bf16(ptr addrspace(1) %out, <4 x bfloat> %in) {
; GCN-LABEL: set_inactive_v4bf16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_mov_b32 s4, 0x3f803f80
; GCN-NEXT: s_mov_b32 s5, s4
; GCN-NEXT: v_mov_b32_e32 v2, s4
Expand All @@ -320,7 +320,7 @@ define amdgpu_kernel void @set_inactive_v4bf16(ptr addrspace(1) %out, <4 x bfloa
define amdgpu_kernel void @set_inactive_p0(ptr addrspace(1) %out, ptr %in) {
; GCN-LABEL: set_inactive_p0:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s2
; GCN-NEXT: v_mov_b32_e32 v1, s3
Expand All @@ -340,11 +340,11 @@ define amdgpu_kernel void @set_inactive_p0(ptr addrspace(1) %out, ptr %in) {
define amdgpu_kernel void @set_inactive_p2(ptr addrspace(1) %out, ptr addrspace(2) %in) {
; GCN-LABEL: set_inactive_p2:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s3
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_not_b64 exec, exec
Expand All @@ -359,11 +359,11 @@ define amdgpu_kernel void @set_inactive_p2(ptr addrspace(1) %out, ptr addrspace(
define amdgpu_kernel void @set_inactive_p3(ptr addrspace(1) %out, ptr addrspace(3) %in) {
; GCN-LABEL: set_inactive_p3:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s3
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_not_b64 exec, exec
Expand All @@ -378,11 +378,11 @@ define amdgpu_kernel void @set_inactive_p3(ptr addrspace(1) %out, ptr addrspace(
define amdgpu_kernel void @set_inactive_p5(ptr addrspace(1) %out, ptr addrspace(5) %in) {
; GCN-LABEL: set_inactive_p5:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s3
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_not_b64 exec, exec
Expand All @@ -397,11 +397,11 @@ define amdgpu_kernel void @set_inactive_p5(ptr addrspace(1) %out, ptr addrspace(
define amdgpu_kernel void @set_inactive_p6(ptr addrspace(1) %out, ptr addrspace(6) %in) {
; GCN-LABEL: set_inactive_p6:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s3, s[0:1], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, s3
; GCN-NEXT: v_mov_b32_e32 v0, s4
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: s_not_b64 exec, exec
Expand Down
32 changes: 16 additions & 16 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ define double @v_trig_preop_f64_imm(double %a, i32 %b) {
define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
; CI-LABEL: s_trig_preop_f64:
; CI: ; %bb.0:
; CI-NEXT: s_load_dword s2, s[4:5], 0x2
; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; CI-NEXT: s_load_dword s2, s[6:7], 0x2
; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v0, s2
; CI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
Expand All @@ -57,8 +57,8 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
;
; VI-LABEL: s_trig_preop_f64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[4:5], 0x8
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; VI-NEXT: s_load_dword s2, s[6:7], 0x8
; VI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
Expand All @@ -74,8 +74,8 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
;
; GFX9-LABEL: s_trig_preop_f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[4:5], 0x8
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX9-NEXT: s_load_dword s2, s[6:7], 0x8
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s2
; GFX9-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
Expand All @@ -86,8 +86,8 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
; GFX10-LABEL: s_trig_preop_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10-NEXT: s_load_dword s2, s[4:5], 0x8
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX10-NEXT: s_load_dword s2, s[6:7], 0x8
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_trig_preop_f64 v[0:1], s[0:1], s2
; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
Expand All @@ -97,10 +97,10 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
; GFX11-LABEL: s_trig_preop_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x8
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
; GFX11-NEXT: s_load_b32 s2, s[2:3], 0x8
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[2:3], s0
; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[0:1], s2
; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc
; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-NEXT: s_endpgm
Expand All @@ -112,7 +112,7 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
; CI-LABEL: s_trig_preop_f64_imm:
; CI: ; %bb.0:
; CI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
; CI-NEXT: s_add_u32 s0, s0, 4
Expand All @@ -127,7 +127,7 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
;
; VI-LABEL: s_trig_preop_f64_imm:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; VI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
; VI-NEXT: s_add_u32 s0, s0, 4
Expand All @@ -142,7 +142,7 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
;
; GFX9-LABEL: s_trig_preop_f64_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
Expand All @@ -151,7 +151,7 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
;
; GFX10-LABEL: s_trig_preop_f64_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
Expand All @@ -160,7 +160,7 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
;
; GFX11-LABEL: s_trig_preop_f64_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc
Expand Down
126 changes: 63 additions & 63 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll

Large diffs are not rendered by default.

83 changes: 45 additions & 38 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
; GFX8-LABEL: dpp_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: v_mov_b32_e32 v0, s3
Expand All @@ -19,7 +19,7 @@ define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
;
; GFX10-LABEL: dpp_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s2
; GFX10-NEXT: v_mov_b32_e32 v1, s3
Expand All @@ -30,7 +30,7 @@ define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
;
; GFX11-LABEL: dpp_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
Expand All @@ -46,7 +46,7 @@ define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
define amdgpu_kernel void @update_dppi64_test(ptr addrspace(1) %arg, i64 %in1, i64 %in2) {
; GFX8-LABEL: update_dppi64_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -64,7 +64,7 @@ define amdgpu_kernel void @update_dppi64_test(ptr addrspace(1) %arg, i64 %in1, i
;
; GFX10-LABEL: update_dppi64_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
Expand All @@ -78,10 +78,11 @@ define amdgpu_kernel void @update_dppi64_test(ptr addrspace(1) %arg, i64 %in1, i
;
; GFX11-LABEL: update_dppi64_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX11-NEXT: global_load_b64 v[0:1], v4, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
Expand All @@ -101,7 +102,7 @@ define amdgpu_kernel void @update_dppi64_test(ptr addrspace(1) %arg, i64 %in1, i
define amdgpu_kernel void @update_dppf64_test(ptr addrspace(1) %arg, double %in1, double %in2) {
; GFX8-LABEL: update_dppf64_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -119,7 +120,7 @@ define amdgpu_kernel void @update_dppf64_test(ptr addrspace(1) %arg, double %in1
;
; GFX10-LABEL: update_dppf64_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
Expand All @@ -133,10 +134,11 @@ define amdgpu_kernel void @update_dppf64_test(ptr addrspace(1) %arg, double %in1
;
; GFX11-LABEL: update_dppf64_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX11-NEXT: global_load_b64 v[0:1], v4, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
Expand All @@ -156,7 +158,7 @@ define amdgpu_kernel void @update_dppf64_test(ptr addrspace(1) %arg, double %in1
define amdgpu_kernel void @update_dppv2i32_test(ptr addrspace(1) %arg, <2 x i32> %in1, <2 x i32> %in2) {
; GFX8-LABEL: update_dppv2i32_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -174,7 +176,7 @@ define amdgpu_kernel void @update_dppv2i32_test(ptr addrspace(1) %arg, <2 x i32>
;
; GFX10-LABEL: update_dppv2i32_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
Expand All @@ -188,10 +190,11 @@ define amdgpu_kernel void @update_dppv2i32_test(ptr addrspace(1) %arg, <2 x i32>
;
; GFX11-LABEL: update_dppv2i32_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX11-NEXT: global_load_b64 v[0:1], v4, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
Expand All @@ -211,7 +214,7 @@ define amdgpu_kernel void @update_dppv2i32_test(ptr addrspace(1) %arg, <2 x i32>
define amdgpu_kernel void @update_dppv2f32_test(ptr addrspace(1) %arg, <2 x float> %in1, <2 x float> %in2) {
; GFX8-LABEL: update_dppv2f32_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -229,7 +232,7 @@ define amdgpu_kernel void @update_dppv2f32_test(ptr addrspace(1) %arg, <2 x floa
;
; GFX10-LABEL: update_dppv2f32_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
Expand All @@ -243,10 +246,11 @@ define amdgpu_kernel void @update_dppv2f32_test(ptr addrspace(1) %arg, <2 x floa
;
; GFX11-LABEL: update_dppv2f32_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX11-NEXT: global_load_b64 v[0:1], v4, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
Expand All @@ -266,7 +270,7 @@ define amdgpu_kernel void @update_dppv2f32_test(ptr addrspace(1) %arg, <2 x floa
define amdgpu_kernel void @update_dpp_p0_test(ptr addrspace(1) %arg, ptr %in1, ptr %in2) {
; GFX8-LABEL: update_dpp_p0_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -284,7 +288,7 @@ define amdgpu_kernel void @update_dpp_p0_test(ptr addrspace(1) %arg, ptr %in1, p
;
; GFX10-LABEL: update_dpp_p0_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
Expand All @@ -298,10 +302,11 @@ define amdgpu_kernel void @update_dpp_p0_test(ptr addrspace(1) %arg, ptr %in1, p
;
; GFX11-LABEL: update_dpp_p0_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
; GFX11-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX11-NEXT: global_load_b64 v[0:1], v4, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
Expand All @@ -321,7 +326,7 @@ define amdgpu_kernel void @update_dpp_p0_test(ptr addrspace(1) %arg, ptr %in1, p
define amdgpu_kernel void @update_dpp_p3_test(ptr addrspace(3) %arg, ptr addrspace(3) %in1, ptr %in2) {
; GFX8-LABEL: update_dpp_p3_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX8-NEXT: s_mov_b32 m0, -1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -336,7 +341,7 @@ define amdgpu_kernel void @update_dpp_p3_test(ptr addrspace(3) %arg, ptr addrspa
;
; GFX10-LABEL: update_dpp_p3_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0
Expand All @@ -349,7 +354,8 @@ define amdgpu_kernel void @update_dpp_p3_test(ptr addrspace(3) %arg, ptr addrspa
;
; GFX11-LABEL: update_dpp_p3_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
Expand All @@ -371,11 +377,11 @@ define amdgpu_kernel void @update_dpp_p5_test(ptr addrspace(5) %arg, ptr addrspa
; GFX8-LABEL: update_dpp_p5_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX8-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
; GFX8-NEXT: s_mov_b32 s90, -1
; GFX8-NEXT: s_mov_b32 s91, 0xe80000
; GFX8-NEXT: s_add_u32 s88, s88, s3
; GFX8-NEXT: s_add_u32 s88, s88, s9
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX8-NEXT: s_addc_u32 s89, s89, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -390,26 +396,27 @@ define amdgpu_kernel void @update_dpp_p5_test(ptr addrspace(5) %arg, ptr addrspa
;
; GFX10-LABEL: update_dpp_p5_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: s_mov_b32 s4, SCRATCH_RSRC_DWORD0
; GFX10-NEXT: s_mov_b32 s5, SCRATCH_RSRC_DWORD1
; GFX10-NEXT: s_mov_b32 s6, -1
; GFX10-NEXT: s_mov_b32 s7, 0x31c16000
; GFX10-NEXT: s_add_u32 s4, s4, s3
; GFX10-NEXT: s_addc_u32 s5, s5, 0
; GFX10-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX10-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX10-NEXT: s_mov_b32 s14, -1
; GFX10-NEXT: s_mov_b32 s15, 0x31c16000
; GFX10-NEXT: s_add_u32 s12, s12, s9
; GFX10-NEXT: s_addc_u32 s13, s13, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0
; GFX10-NEXT: v_mov_b32_e32 v2, s1
; GFX10-NEXT: buffer_load_dword v1, v0, s[4:7], 0 offen
; GFX10-NEXT: buffer_load_dword v1, v0, s[12:15], 0 offen
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_dpp v2, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX10-NEXT: buffer_store_dword v2, v0, s[4:7], 0 offen
; GFX10-NEXT: buffer_store_dword v2, v0, s[12:15], 0 offen
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: update_dpp_p5_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_add_nc_u32_e32 v0, s0, v0
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D %s
; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor %s -o %t.bc
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii < %t.bc | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga < %t.bc | FileCheck --check-prefixes=ALL,UNKNOWN-OS %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=hawaii < %t.bc | FileCheck -check-prefixes=ALL,MESA3D %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga < %t.bc | FileCheck -check-prefixes=ALL,MESA3D %s

declare i32 @llvm.amdgcn.workgroup.id.x() #0
declare i32 @llvm.amdgcn.workgroup.id.y() #0
Expand Down
20 changes: 11 additions & 9 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs| FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -o %t.v4.ll
; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-attributor -o %t.v6.ll
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %t.v4.ll | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %t.v4.ll | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %t.v4.ll | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %t.v4.ll | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %t.v4.ll | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %t.v4.ll | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 < %t.v6.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s

declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
define amdgpu_kernel void @localize_constants(i1 %cond) {
; GFX9-LABEL: localize_constants:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0
; GFX9-NEXT: s_load_dword s1, s[6:7], 0x0
; GFX9-NEXT: s_mov_b32 s0, 1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_xor_b32 s1, s1, 1
Expand Down Expand Up @@ -95,7 +95,7 @@ bb2:
define amdgpu_kernel void @localize_globals(i1 %cond) {
; GFX9-LABEL: localize_globals:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dword s1, s[4:5], 0x0
; GFX9-NEXT: s_load_dword s1, s[6:7], 0x0
; GFX9-NEXT: s_mov_b32 s0, 1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_xor_b32 s1, s1, 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
; Note: we use MIR test checks + stop after legalizer to prevent
; tests from being optimized out.

define amdgpu_kernel void @system_one_as_acquire() {
define amdgpu_kernel void @system_one_as_acquire() #0 {
; GFX6-LABEL: name: system_one_as_acquire
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 3952
Expand Down Expand Up @@ -59,7 +59,7 @@ entry:
ret void
}

define amdgpu_kernel void @system_one_as_release() {
define amdgpu_kernel void @system_one_as_release() #0 {
; GFX6-LABEL: name: system_one_as_release
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 3952
Expand Down Expand Up @@ -98,7 +98,7 @@ entry:
ret void
}

define amdgpu_kernel void @system_one_as_acq_rel() {
define amdgpu_kernel void @system_one_as_acq_rel() #0 {
; GFX6-LABEL: name: system_one_as_acq_rel
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 3952
Expand Down Expand Up @@ -147,7 +147,7 @@ entry:
ret void
}

define amdgpu_kernel void @system_one_as_seq_cst() {
define amdgpu_kernel void @system_one_as_seq_cst() #0 {
; GFX6-LABEL: name: system_one_as_seq_cst
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 3952
Expand Down Expand Up @@ -196,7 +196,7 @@ entry:
ret void
}

define amdgpu_kernel void @singlethread_one_as_acquire() {
define amdgpu_kernel void @singlethread_one_as_acquire() #0 {
; GFX6-LABEL: name: singlethread_one_as_acquire
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -225,7 +225,7 @@ entry:
ret void
}

define amdgpu_kernel void @singlethread_one_as_release() {
define amdgpu_kernel void @singlethread_one_as_release() #0 {
; GFX6-LABEL: name: singlethread_one_as_release
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -254,7 +254,7 @@ entry:
ret void
}

define amdgpu_kernel void @singlethread_one_as_acq_rel() {
define amdgpu_kernel void @singlethread_one_as_acq_rel() #0 {
; GFX6-LABEL: name: singlethread_one_as_acq_rel
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -283,7 +283,7 @@ entry:
ret void
}

define amdgpu_kernel void @singlethread_one_as_seq_cst() {
define amdgpu_kernel void @singlethread_one_as_seq_cst() #0 {
; GFX6-LABEL: name: singlethread_one_as_seq_cst
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -312,7 +312,7 @@ entry:
ret void
}

define amdgpu_kernel void @agent_one_as_acquire() {
define amdgpu_kernel void @agent_one_as_acquire() #0 {
; GFX6-LABEL: name: agent_one_as_acquire
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 3952
Expand Down Expand Up @@ -361,7 +361,7 @@ entry:
ret void
}

define amdgpu_kernel void @agent_one_as_release() {
define amdgpu_kernel void @agent_one_as_release() #0 {
; GFX6-LABEL: name: agent_one_as_release
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 3952
Expand Down Expand Up @@ -400,7 +400,7 @@ entry:
ret void
}

define amdgpu_kernel void @agent_one_as_acq_rel() {
define amdgpu_kernel void @agent_one_as_acq_rel() #0 {
; GFX6-LABEL: name: agent_one_as_acq_rel
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 3952
Expand Down Expand Up @@ -449,7 +449,7 @@ entry:
ret void
}

define amdgpu_kernel void @agent_one_as_seq_cst() {
define amdgpu_kernel void @agent_one_as_seq_cst() #0 {
; GFX6-LABEL: name: agent_one_as_seq_cst
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 3952
Expand Down Expand Up @@ -498,7 +498,7 @@ entry:
ret void
}

define amdgpu_kernel void @workgroup_one_as_acquire() {
define amdgpu_kernel void @workgroup_one_as_acquire() #0 {
; GFX6-LABEL: name: workgroup_one_as_acquire
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -533,7 +533,7 @@ entry:
ret void
}

define amdgpu_kernel void @workgroup_one_as_release() {
define amdgpu_kernel void @workgroup_one_as_release() #0 {
; GFX6-LABEL: name: workgroup_one_as_release
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -566,7 +566,7 @@ entry:
ret void
}

define amdgpu_kernel void @workgroup_one_as_acq_rel() {
define amdgpu_kernel void @workgroup_one_as_acq_rel() #0 {
; GFX6-LABEL: name: workgroup_one_as_acq_rel
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -601,7 +601,7 @@ entry:
ret void
}

define amdgpu_kernel void @workgroup_one_as_seq_cst() {
define amdgpu_kernel void @workgroup_one_as_seq_cst() #0 {
; GFX6-LABEL: name: workgroup_one_as_seq_cst
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -636,7 +636,7 @@ entry:
ret void
}

define amdgpu_kernel void @wavefront_one_as_acquire() {
define amdgpu_kernel void @wavefront_one_as_acquire() #0 {
; GFX6-LABEL: name: wavefront_one_as_acquire
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -665,7 +665,7 @@ entry:
ret void
}

define amdgpu_kernel void @wavefront_one_as_release() {
define amdgpu_kernel void @wavefront_one_as_release() #0 {
; GFX6-LABEL: name: wavefront_one_as_release
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -694,7 +694,7 @@ entry:
ret void
}

define amdgpu_kernel void @wavefront_one_as_acq_rel() {
define amdgpu_kernel void @wavefront_one_as_acq_rel() #0 {
; GFX6-LABEL: name: wavefront_one_as_acq_rel
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -723,7 +723,7 @@ entry:
ret void
}

define amdgpu_kernel void @wavefront_one_as_seq_cst() {
define amdgpu_kernel void @wavefront_one_as_seq_cst() #0 {
; GFX6-LABEL: name: wavefront_one_as_seq_cst
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -752,7 +752,7 @@ entry:
ret void
}

define amdgpu_kernel void @system_acquire() {
define amdgpu_kernel void @system_acquire() #0 {
; GFX6-LABEL: name: system_acquire
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 112
Expand Down Expand Up @@ -801,7 +801,7 @@ entry:
ret void
}

define amdgpu_kernel void @system_release() {
define amdgpu_kernel void @system_release() #0 {
; GFX6-LABEL: name: system_release
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 112
Expand Down Expand Up @@ -840,7 +840,7 @@ entry:
ret void
}

define amdgpu_kernel void @system_acq_rel() {
define amdgpu_kernel void @system_acq_rel() #0 {
; GFX6-LABEL: name: system_acq_rel
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 112
Expand Down Expand Up @@ -889,7 +889,7 @@ entry:
ret void
}

define amdgpu_kernel void @system_seq_cst() {
define amdgpu_kernel void @system_seq_cst() #0 {
; GFX6-LABEL: name: system_seq_cst
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 112
Expand Down Expand Up @@ -938,7 +938,7 @@ entry:
ret void
}

define amdgpu_kernel void @singlethread_acquire() {
define amdgpu_kernel void @singlethread_acquire() #0 {
; GFX6-LABEL: name: singlethread_acquire
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -967,7 +967,7 @@ entry:
ret void
}

define amdgpu_kernel void @singlethread_release() {
define amdgpu_kernel void @singlethread_release() #0 {
; GFX6-LABEL: name: singlethread_release
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -996,7 +996,7 @@ entry:
ret void
}

define amdgpu_kernel void @singlethread_acq_rel() {
define amdgpu_kernel void @singlethread_acq_rel() #0 {
; GFX6-LABEL: name: singlethread_acq_rel
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -1025,7 +1025,7 @@ entry:
ret void
}

define amdgpu_kernel void @singlethread_seq_cst() {
define amdgpu_kernel void @singlethread_seq_cst() #0 {
; GFX6-LABEL: name: singlethread_seq_cst
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -1054,7 +1054,7 @@ entry:
ret void
}

define amdgpu_kernel void @agent_acquire() {
define amdgpu_kernel void @agent_acquire() #0 {
; GFX6-LABEL: name: agent_acquire
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 112
Expand Down Expand Up @@ -1103,7 +1103,7 @@ entry:
ret void
}

define amdgpu_kernel void @agent_release() {
define amdgpu_kernel void @agent_release() #0 {
; GFX6-LABEL: name: agent_release
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 112
Expand Down Expand Up @@ -1142,7 +1142,7 @@ entry:
ret void
}

define amdgpu_kernel void @agent_acq_rel() {
define amdgpu_kernel void @agent_acq_rel() #0 {
; GFX6-LABEL: name: agent_acq_rel
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 112
Expand Down Expand Up @@ -1191,7 +1191,7 @@ entry:
ret void
}

define amdgpu_kernel void @agent_seq_cst() {
define amdgpu_kernel void @agent_seq_cst() #0 {
; GFX6-LABEL: name: agent_seq_cst
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 112
Expand Down Expand Up @@ -1240,7 +1240,7 @@ entry:
ret void
}

define amdgpu_kernel void @workgroup_acquire() {
define amdgpu_kernel void @workgroup_acquire() #0 {
; GFX6-LABEL: name: workgroup_acquire
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 127
Expand Down Expand Up @@ -1279,7 +1279,7 @@ entry:
ret void
}

define amdgpu_kernel void @workgroup_release() {
define amdgpu_kernel void @workgroup_release() #0 {
; GFX6-LABEL: name: workgroup_release
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 127
Expand Down Expand Up @@ -1316,7 +1316,7 @@ entry:
ret void
}

define amdgpu_kernel void @workgroup_acq_rel() {
define amdgpu_kernel void @workgroup_acq_rel() #0 {
; GFX6-LABEL: name: workgroup_acq_rel
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 127
Expand Down Expand Up @@ -1355,7 +1355,7 @@ entry:
ret void
}

define amdgpu_kernel void @workgroup_seq_cst() {
define amdgpu_kernel void @workgroup_seq_cst() #0 {
; GFX6-LABEL: name: workgroup_seq_cst
; GFX6: bb.0.entry:
; GFX6-NEXT: S_WAITCNT_soft 127
Expand Down Expand Up @@ -1394,7 +1394,7 @@ entry:
ret void
}

define amdgpu_kernel void @wavefront_acquire() {
define amdgpu_kernel void @wavefront_acquire() #0 {
; GFX6-LABEL: name: wavefront_acquire
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -1423,7 +1423,7 @@ entry:
ret void
}

define amdgpu_kernel void @wavefront_release() {
define amdgpu_kernel void @wavefront_release() #0 {
; GFX6-LABEL: name: wavefront_release
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -1452,7 +1452,7 @@ entry:
ret void
}

define amdgpu_kernel void @wavefront_acq_rel() {
define amdgpu_kernel void @wavefront_acq_rel() #0 {
; GFX6-LABEL: name: wavefront_acq_rel
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -1481,7 +1481,7 @@ entry:
ret void
}

define amdgpu_kernel void @wavefront_seq_cst() {
define amdgpu_kernel void @wavefront_seq_cst() #0 {
; GFX6-LABEL: name: wavefront_seq_cst
; GFX6: bb.0.entry:
; GFX6-NEXT: S_ENDPGM 0
Expand Down Expand Up @@ -1509,3 +1509,5 @@ entry:
fence syncscope("wavefront") seq_cst
ret void
}

attributes #0 = { "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
114 changes: 66 additions & 48 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ declare i32 @llvm.amdgcn.workitem.id.x()
define amdgpu_kernel void @v_mul_i64_no_zext(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind {
; GFX10-LABEL: v_mul_i64_no_zext:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x2c
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x2c
; GFX10-NEXT: v_lshlrev_b32_e32 v7, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
Expand All @@ -23,7 +23,9 @@ define amdgpu_kernel void @v_mul_i64_no_zext(ptr addrspace(1) %out, ptr addrspac
;
; GFX11-LABEL: v_mul_i64_no_zext:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x2c
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x2c
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v9, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
Expand Down Expand Up @@ -56,13 +58,13 @@ define amdgpu_kernel void @v_mul_i64_zext_src1(ptr addrspace(1) %out, ptr addrsp
; GFX10-LABEL: v_mul_i64_zext_src1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 2, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v2, s[6:7]
; GFX10-NEXT: global_load_dword v4, v3, s[2:3]
; GFX10-NEXT: global_load_dword v4, v3, s[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, v0, v4, 0
; GFX10-NEXT: v_mov_b32_e32 v0, v3
Expand All @@ -75,8 +77,10 @@ define amdgpu_kernel void @v_mul_i64_zext_src1(ptr addrspace(1) %out, ptr addrsp
; GFX11-LABEL: v_mul_i64_zext_src1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -108,13 +112,13 @@ define amdgpu_kernel void @v_mul_i64_zext_src0(ptr addrspace(1) %out, ptr addrsp
; GFX10-LABEL: v_mul_i64_zext_src0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v0
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dword v4, v2, s[6:7]
; GFX10-NEXT: global_load_dwordx2 v[0:1], v3, s[2:3]
; GFX10-NEXT: global_load_dwordx2 v[0:1], v3, s[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, v4, v0, 0
; GFX10-NEXT: v_mov_b32_e32 v0, v3
Expand All @@ -127,8 +131,10 @@ define amdgpu_kernel void @v_mul_i64_zext_src0(ptr addrspace(1) %out, ptr addrsp
; GFX11-LABEL: v_mul_i64_zext_src0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -160,13 +166,13 @@ define amdgpu_kernel void @v_mul_i64_zext_src0_src1(ptr addrspace(1) %out, ptr a
; GFX10-LABEL: v_mul_i64_zext_src0_src1:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dword v1, v0, s[6:7]
; GFX10-NEXT: global_load_dword v2, v0, s[2:3]
; GFX10-NEXT: global_load_dword v2, v0, s[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mad_u64_u32 v[0:1], s0, v1, v2, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
Expand All @@ -176,10 +182,12 @@ define amdgpu_kernel void @v_mul_i64_zext_src0_src1(ptr addrspace(1) %out, ptr a
; GFX11-LABEL: v_mul_i64_zext_src0_src1:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: global_load_b32 v1, v0, s[6:7]
Expand Down Expand Up @@ -207,13 +215,13 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_hi(ptr addrspace(1) %out, ptr a
; GFX10-LABEL: v_mul_i64_masked_src0_hi:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dword v4, v2, s[6:7]
; GFX10-NEXT: global_load_dwordx2 v[0:1], v2, s[2:3]
; GFX10-NEXT: global_load_dwordx2 v[0:1], v2, s[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mad_u64_u32 v[2:3], s0, v4, v0, 0
; GFX10-NEXT: v_mov_b32_e32 v0, v3
Expand All @@ -226,8 +234,10 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_hi(ptr addrspace(1) %out, ptr a
; GFX11-LABEL: v_mul_i64_masked_src0_hi:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
Expand Down Expand Up @@ -259,13 +269,13 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_lo(ptr addrspace(1) %out, ptr a
; GFX10-LABEL: v_mul_i64_masked_src0_lo:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[6:7]
; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3]
; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
Expand All @@ -276,8 +286,10 @@ define amdgpu_kernel void @v_mul_i64_masked_src0_lo(ptr addrspace(1) %out, ptr a
; GFX11-LABEL: v_mul_i64_masked_src0_lo:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
Expand Down Expand Up @@ -307,16 +319,16 @@ define amdgpu_kernel void @v_mul_i64_masked_src1_lo(ptr addrspace(1) %out, ptr a
; GFX10-LABEL: v_mul_i64_masked_src1_lo:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v3, 3, v0
; GFX10-NEXT: ; kill: killed $vgpr3
; GFX10-NEXT: ; kill: killed $sgpr6_sgpr7
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx2 v[0:1], v3, s[6:7]
; GFX10-NEXT: global_load_dwordx2 v[1:2], v3, s[2:3]
; GFX10-NEXT: ; kill: killed $sgpr2_sgpr3
; GFX10-NEXT: global_load_dwordx2 v[1:2], v3, s[0:1]
; GFX10-NEXT: ; kill: killed $sgpr0_sgpr1
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mul_lo_u32 v1, v0, v2
; GFX10-NEXT: v_mov_b32_e32 v0, 0
Expand All @@ -326,8 +338,10 @@ define amdgpu_kernel void @v_mul_i64_masked_src1_lo(ptr addrspace(1) %out, ptr a
; GFX11-LABEL: v_mul_i64_masked_src1_lo:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
Expand Down Expand Up @@ -355,7 +369,7 @@ define amdgpu_kernel void @v_mul_i64_masked_src1_lo(ptr addrspace(1) %out, ptr a
define amdgpu_kernel void @v_mul_i64_masked_src0(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
; GFX10-LABEL: v_mul_i64_masked_src0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
Expand All @@ -365,7 +379,7 @@ define amdgpu_kernel void @v_mul_i64_masked_src0(ptr addrspace(1) %out, ptr addr
;
; GFX11-LABEL: v_mul_i64_masked_src0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -389,13 +403,13 @@ define amdgpu_kernel void @v_mul_i64_partially_masked_src0(ptr addrspace(1) %out
; GFX10-LABEL: v_mul_i64_partially_masked_src0:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[6:7]
; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[2:3]
; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[0:1]
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_and_b32_e32 v6, 0xfff00000, v0
; GFX10-NEXT: s_waitcnt vmcnt(0)
Expand All @@ -412,8 +426,10 @@ define amdgpu_kernel void @v_mul_i64_partially_masked_src0(ptr addrspace(1) %out
; GFX11-LABEL: v_mul_i64_partially_masked_src0:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
Expand Down Expand Up @@ -450,7 +466,7 @@ define amdgpu_kernel void @v_mul_i64_partially_masked_src0(ptr addrspace(1) %out
define amdgpu_kernel void @v_mul64_masked_before_branch(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) {
; GFX10-LABEL: v_mul64_masked_before_branch:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
Expand All @@ -460,7 +476,7 @@ define amdgpu_kernel void @v_mul64_masked_before_branch(ptr addrspace(1) %out, p
;
; GFX11-LABEL: v_mul64_masked_before_branch:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v0, 0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -498,13 +514,13 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
; GFX10-LABEL: v_mul64_masked_before_and_in_branch:
; GFX10: ; %bb.0: ; %entry
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x34
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: global_load_dwordx2 v[2:3], v0, s[6:7]
; GFX10-NEXT: global_load_dwordx2 v[4:5], v0, s[2:3]
; GFX10-NEXT: global_load_dwordx2 v[4:5], v0, s[0:1]
; GFX10-NEXT: ; implicit-def: $vgpr0_vgpr1
; GFX10-NEXT: s_waitcnt vmcnt(1)
; GFX10-NEXT: v_cmp_ge_u64_e32 vcc_lo, 0, v[2:3]
Expand Down Expand Up @@ -533,8 +549,10 @@ define amdgpu_kernel void @v_mul64_masked_before_and_in_branch(ptr addrspace(1)
; GFX11-LABEL: v_mul64_masked_before_and_in_branch:
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34
; GFX11-NEXT: s_load_b128 s[4:7], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x34
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_clause 0x1
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2542,7 +2542,7 @@ define amdgpu_ps void @s_mul_u64_zext_with_vregs(ptr addrspace(1) %out, ptr addr
define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; GFX7-LABEL: s_mul_u64_zext_with_sregs:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; GFX7-NEXT: v_mov_b32_e32 v0, 0x50
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_load_dword s3, s[2:3], 0x0
Expand All @@ -2559,7 +2559,7 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX8-LABEL: s_mul_u64_zext_with_sregs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: v_mov_b32_e32 v0, 0x50
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -2576,7 +2576,7 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX9-LABEL: s_mul_u64_zext_with_sregs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s3, s[2:3], 0x0
Expand All @@ -2590,7 +2590,7 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX10-LABEL: s_mul_u64_zext_with_sregs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s3, s[2:3], 0x0
Expand All @@ -2604,7 +2604,7 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX11-LABEL: s_mul_u64_zext_with_sregs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b32 s3, s[2:3], 0x0
Expand All @@ -2619,7 +2619,7 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX12-LABEL: s_mul_u64_zext_with_sregs:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
Expand Down Expand Up @@ -2718,7 +2718,7 @@ define amdgpu_ps void @s_mul_u64_sext_with_vregs(ptr addrspace(1) %out, ptr addr
define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; GFX7-LABEL: s_mul_u64_sext_with_sregs:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; GFX7-NEXT: v_mov_b32_e32 v0, 0x50
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_load_dword s3, s[2:3], 0x0
Expand All @@ -2738,7 +2738,7 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX8-LABEL: s_mul_u64_sext_with_sregs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: v_mov_b32_e32 v0, 0x50
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -2758,7 +2758,7 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX9-LABEL: s_mul_u64_sext_with_sregs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s3, s[2:3], 0x0
Expand All @@ -2775,7 +2775,7 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX10-LABEL: s_mul_u64_sext_with_sregs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -2792,7 +2792,7 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX11-LABEL: s_mul_u64_sext_with_sregs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b32 s2, s[2:3], 0x0
Expand All @@ -2810,7 +2810,7 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX12-LABEL: s_mul_u64_sext_with_sregs:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX12-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
Expand Down
46 changes: 23 additions & 23 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,33 @@
define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) {
; GCN-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align4:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dword s6, s[4:5], 0x8
; GCN-NEXT: s_add_u32 s0, s0, s9
; GCN-NEXT: s_load_dword s4, s[6:7], 0x8
; GCN-NEXT: s_add_u32 s0, s0, s15
; GCN-NEXT: s_addc_u32 s1, s1, 0
; GCN-NEXT: s_mov_b32 s33, 0
; GCN-NEXT: s_movk_i32 s32, 0x400
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_cmp_lg_u32 s4, 0
; GCN-NEXT: s_cbranch_scc1 .LBB0_3
; GCN-NEXT: ; %bb.1: ; %bb.0
; GCN-NEXT: s_load_dword s6, s[4:5], 0xc
; GCN-NEXT: s_load_dword s4, s[6:7], 0xc
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_cmp_lg_u32 s4, 0
; GCN-NEXT: s_cbranch_scc1 .LBB0_3
; GCN-NEXT: ; %bb.2: ; %bb.1
; GCN-NEXT: s_load_dword s7, s[4:5], 0x10
; GCN-NEXT: s_add_u32 s6, s32, 0x1000
; GCN-NEXT: s_load_dword s5, s[6:7], 0x10
; GCN-NEXT: s_add_u32 s4, s32, 0x1000
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, s6
; GCN-NEXT: v_mov_b32_e32 v2, s4
; GCN-NEXT: v_mov_b32_e32 v3, 1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_lshl_b32 s7, s7, 2
; GCN-NEXT: s_add_u32 s6, s6, s7
; GCN-NEXT: s_lshl_b32 s5, s5, 2
; GCN-NEXT: s_add_u32 s4, s4, s5
; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4
; GCN-NEXT: v_mov_b32_e32 v2, s6
; GCN-NEXT: v_mov_b32_e32 v2, s4
; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GCN-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_add_u32_e32 v0, v2, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -84,29 +84,29 @@ bb.2:
define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align64(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) {
; GCN-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dword s6, s[4:5], 0x8
; GCN-NEXT: s_add_u32 s0, s0, s9
; GCN-NEXT: s_load_dword s4, s[6:7], 0x8
; GCN-NEXT: s_add_u32 s0, s0, s15
; GCN-NEXT: s_addc_u32 s1, s1, 0
; GCN-NEXT: s_mov_b32 s33, 0
; GCN-NEXT: s_movk_i32 s32, 0x1000
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_cmp_lg_u32 s4, 0
; GCN-NEXT: s_cbranch_scc1 .LBB1_2
; GCN-NEXT: ; %bb.1: ; %bb.0
; GCN-NEXT: s_load_dword s6, s[4:5], 0xc
; GCN-NEXT: s_add_u32 s7, s32, 0x1000
; GCN-NEXT: s_and_b32 s7, s7, 0xfffff000
; GCN-NEXT: s_load_dword s4, s[6:7], 0xc
; GCN-NEXT: s_add_u32 s5, s32, 0x1000
; GCN-NEXT: s_and_b32 s5, s5, 0xfffff000
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, s7
; GCN-NEXT: v_mov_b32_e32 v2, s5
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_lshl_b32 s6, s6, 2
; GCN-NEXT: s_lshl_b32 s4, s4, 2
; GCN-NEXT: v_mov_b32_e32 v3, 1
; GCN-NEXT: s_add_u32 s6, s7, s6
; GCN-NEXT: s_add_u32 s4, s5, s4
; GCN-NEXT: buffer_store_dword v1, v2, s[0:3], 0 offen
; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4
; GCN-NEXT: v_mov_b32_e32 v2, s6
; GCN-NEXT: v_mov_b32_e32 v2, s4
; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GCN-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_add_u32_e32 v0, v2, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
Expand Down
738 changes: 369 additions & 369 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll

Large diffs are not rendered by default.

35 changes: 18 additions & 17 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ define i64 @v_shl_i64_sext_i32_overflow(i32 %x) {
define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) {
; GFX7-LABEL: mulu24_shl64:
; GFX7: ; %bb.0: ; %bb
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX7-NEXT: v_and_b32_e32 v0, 6, v0
; GFX7-NEXT: v_mul_u32_u24_e32 v0, 7, v0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
Expand All @@ -251,7 +251,7 @@ define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) {
;
; GFX8-LABEL: mulu24_shl64:
; GFX8: ; %bb.0: ; %bb
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX8-NEXT: v_and_b32_e32 v0, 6, v0
; GFX8-NEXT: v_mul_u32_u24_e32 v0, 7, v0
; GFX8-NEXT: v_mov_b32_e32 v1, 0
Expand All @@ -266,7 +266,7 @@ define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) {
;
; GFX9-LABEL: mulu24_shl64:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX9-NEXT: v_and_b32_e32 v0, 6, v0
; GFX9-NEXT: v_mul_u32_u24_e32 v0, 7, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
Expand All @@ -281,7 +281,7 @@ define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) {
;
; GFX10-LABEL: mulu24_shl64:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX10-NEXT: v_and_b32_e32 v0, 6, v0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mul_u32_u24_e32 v0, 7, v0
Expand All @@ -296,7 +296,7 @@ define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) {
;
; GFX11-LABEL: mulu24_shl64:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 6, v0
; GFX11-NEXT: v_mul_u32_u24_e32 v0, 7, v0
; GFX11-NEXT: v_lshlrev_b64 v[2:3], 2, v[0:1]
Expand All @@ -321,7 +321,7 @@ bb:
define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr addrspace(1) nocapture readonly %arg1) {
; GFX7-LABEL: muli24_shl64:
; GFX7: ; %bb.0: ; %bb
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: s_mov_b32 s6, 0
Expand All @@ -340,7 +340,7 @@ define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr add
;
; GFX8-LABEL: muli24_shl64:
; GFX8: ; %bb.0: ; %bb
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -363,7 +363,7 @@ define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr add
;
; GFX9-LABEL: muli24_shl64:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
Expand All @@ -378,7 +378,7 @@ define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr add
;
; GFX10-LABEL: muli24_shl64:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x0
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
Expand All @@ -393,16 +393,17 @@ define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr add
;
; GFX11-LABEL: muli24_shl64:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_lshlrev_b32 v1, 2, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v2, 0x3ff, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v2
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: global_load_b32 v1, v1, s[2:3]
; GFX11-NEXT: global_load_b32 v0, v0, s[2:3]
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: v_or_b32_e32 v1, 0xff800000, v1
; GFX11-NEXT: v_mul_i32_i24_e32 v1, -7, v1
; GFX11-NEXT: v_lshlrev_b64 v[1:2], 3, v[1:2]
; GFX11-NEXT: global_store_b64 v0, v[1:2], s[0:1]
; GFX11-NEXT: v_or_b32_e32 v0, 0xff800000, v0
; GFX11-NEXT: v_mul_i32_i24_e32 v0, -7, v0
; GFX11-NEXT: v_lshlrev_b64 v[0:1], 3, v[0:1]
; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
; GFX11-NEXT: s_nop 0
; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-NEXT: s_endpgm
Expand Down
172 changes: 86 additions & 86 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll

Large diffs are not rendered by default.

172 changes: 86 additions & 86 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll

Large diffs are not rendered by default.

486 changes: 243 additions & 243 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll

Large diffs are not rendered by default.

196 changes: 98 additions & 98 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll

Large diffs are not rendered by default.

60 changes: 30 additions & 30 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 {
; GFX8-LABEL: constant_load_i8_align4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -18,7 +18,7 @@ define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr a
;
; GFX9-LABEL: constant_load_i8_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -29,7 +29,7 @@ define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr a
;
; GFX10-LABEL: constant_load_i8_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -45,7 +45,7 @@ define amdgpu_kernel void @constant_load_i8_align4(ptr addrspace (1) %out, ptr a
define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr addrspace(4) %in) #0 {
; GFX8-LABEL: constant_load_i16_align4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -57,7 +57,7 @@ define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr
;
; GFX9-LABEL: constant_load_i16_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -68,7 +68,7 @@ define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr
;
; GFX10-LABEL: constant_load_i16_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -84,7 +84,7 @@ define amdgpu_kernel void @constant_load_i16_align4(ptr addrspace (1) %out, ptr
define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: sextload_i8_to_i32_align4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -97,7 +97,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX9-LABEL: sextload_i8_to_i32_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -109,7 +109,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX10-LABEL: sextload_i8_to_i32_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -127,7 +127,7 @@ define amdgpu_kernel void @sextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: sextload_i16_to_i32_align4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -140,7 +140,7 @@ define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX9-LABEL: sextload_i16_to_i32_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -152,7 +152,7 @@ define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX10-LABEL: sextload_i16_to_i32_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -170,7 +170,7 @@ define amdgpu_kernel void @sextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: zextload_i8_to_i32_align4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -183,7 +183,7 @@ define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX9-LABEL: zextload_i8_to_i32_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -195,7 +195,7 @@ define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX10-LABEL: zextload_i8_to_i32_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -213,7 +213,7 @@ define amdgpu_kernel void @zextload_i8_to_i32_align4(ptr addrspace(1) %out, ptr
define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: zextload_i16_to_i32_align4:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -226,7 +226,7 @@ define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX9-LABEL: zextload_i16_to_i32_align4:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v1, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -238,7 +238,7 @@ define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
;
; GFX10-LABEL: zextload_i16_to_i32_align4:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
Expand All @@ -256,7 +256,7 @@ define amdgpu_kernel void @zextload_i16_to_i32_align4(ptr addrspace(1) %out, ptr
define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: constant_load_i8_align2:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
Expand All @@ -269,7 +269,7 @@ define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr ad
;
; GFX9-LABEL: constant_load_i8_align2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ubyte v1, v0, s[2:3]
Expand All @@ -279,7 +279,7 @@ define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr ad
;
; GFX10-LABEL: constant_load_i8_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ubyte v1, v0, s[2:3]
Expand All @@ -294,7 +294,7 @@ define amdgpu_kernel void @constant_load_i8_align2(ptr addrspace(1) %out, ptr ad
define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: constant_load_i16_align2:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
Expand All @@ -307,7 +307,7 @@ define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr a
;
; GFX9-LABEL: constant_load_i16_align2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v1, v0, s[2:3]
Expand All @@ -317,7 +317,7 @@ define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr a
;
; GFX10-LABEL: constant_load_i16_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ushort v1, v0, s[2:3]
Expand All @@ -332,7 +332,7 @@ define amdgpu_kernel void @constant_load_i16_align2(ptr addrspace(1) %out, ptr a
define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: constant_sextload_i8_align2:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
Expand All @@ -351,7 +351,7 @@ define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, pt
;
; GFX9-LABEL: constant_sextload_i8_align2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_sbyte v1, v0, s[2:3]
Expand All @@ -362,7 +362,7 @@ define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, pt
;
; GFX10-LABEL: constant_sextload_i8_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_sbyte v1, v0, s[2:3]
Expand All @@ -379,7 +379,7 @@ define amdgpu_kernel void @constant_sextload_i8_align2(ptr addrspace(1) %out, pt
define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
; GFX8-LABEL: constant_zextload_i8_align2:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
Expand All @@ -398,7 +398,7 @@ define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, pt
;
; GFX9-LABEL: constant_zextload_i8_align2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ubyte v1, v0, s[2:3]
Expand All @@ -409,7 +409,7 @@ define amdgpu_kernel void @constant_zextload_i8_align2(ptr addrspace(1) %out, pt
;
; GFX10-LABEL: constant_zextload_i8_align2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX10-NEXT: v_mov_b32_e32 v0, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_ubyte v1, v0, s[2:3]
Expand Down
Loading