98 changes: 49 additions & 49 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll

Large diffs are not rendered by default.

86 changes: 43 additions & 43 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.set.inactive.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
define amdgpu_kernel void @set_inactive(ptr addrspace(1) %out, i32 %in) {
; GCN-LABEL: set_inactive:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_load_dword s3, s[4:5], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: v_cndmask_b32_e64 v0, 42, v1, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[4:5]
Expand All @@ -25,7 +25,7 @@ define amdgpu_kernel void @set_inactive(ptr addrspace(1) %out, i32 %in) {
define amdgpu_kernel void @set_inactive_imm_poison(ptr addrspace(1) %out) {
; GCN-LABEL: set_inactive_imm_poison:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: v_mov_b32_e32 v0, 1
; GCN-NEXT: v_mov_b32_e32 v0, v0
; GCN-NEXT: s_mov_b32 s2, -1
Expand All @@ -42,7 +42,7 @@ define amdgpu_kernel void @set_inactive_imm_poison(ptr addrspace(1) %out) {
define amdgpu_kernel void @set_inactive_64(ptr addrspace(1) %out, i64 %in) {
; GCN-LABEL: set_inactive_64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, s2
; GCN-NEXT: v_mov_b32_e32 v3, s3
Expand All @@ -65,7 +65,7 @@ define amdgpu_kernel void @set_inactive_64(ptr addrspace(1) %out, i64 %in) {
define amdgpu_kernel void @set_inactive_imm_poison_64(ptr addrspace(1) %out) {
; GCN-LABEL: set_inactive_imm_poison_64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: v_mov_b32_e32 v0, 1
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v0, v0
Expand All @@ -84,17 +84,17 @@ define amdgpu_kernel void @set_inactive_imm_poison_64(ptr addrspace(1) %out) {
define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x i32> inreg %desc) {
; GCN-LABEL: set_inactive_scc:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x34
; GCN-NEXT: s_load_dword s8, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
; GCN-NEXT: s_load_dword s6, s[4:5], 0x2c
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_buffer_load_dword s4, s[4:7], 0x0
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: v_mov_b32_e32 v1, s8
; GCN-NEXT: s_buffer_load_dword s7, s[0:3], 0x0
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: v_mov_b32_e32 v1, s6
; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
; GCN-NEXT: v_cndmask_b32_e64 v0, 42, v1, s[2:3]
; GCN-NEXT: s_mov_b64 exec, s[2:3]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s4, 56
; GCN-NEXT: s_cmp_lg_u32 s7, 56
; GCN-NEXT: v_mov_b32_e32 v1, v0
; GCN-NEXT: s_mov_b32 s2, 1
; GCN-NEXT: s_cbranch_scc0 .LBB4_2
Expand Down Expand Up @@ -137,14 +137,14 @@ define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x
define amdgpu_kernel void @set_inactive_f32(ptr addrspace(1) %out, float %in) {
; GCN-LABEL: set_inactive_f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_load_dword s6, s[4:5], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
; GCN-NEXT: v_mov_b32_e32 v0, 0x40400000
; GCN-NEXT: s_mov_b64 exec, s[2:3]
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: v_mov_b32_e32 v1, s6
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[4:5]
Expand All @@ -161,7 +161,7 @@ define amdgpu_kernel void @set_inactive_f32(ptr addrspace(1) %out, float %in) {
define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
; GCN-LABEL: set_inactive_f64:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: v_mov_b32_e32 v0, 0xcccccccd
; GCN-NEXT: v_mov_b32_e32 v1, 0x4010cccc
Expand All @@ -188,14 +188,14 @@ define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) {
define amdgpu_kernel void @set_inactive_v2i16(ptr addrspace(1) %out, <2 x i16> %in) {
; GCN-LABEL: set_inactive_v2i16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_load_dword s6, s[4:5], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
; GCN-NEXT: v_mov_b32_e32 v0, 0x10001
; GCN-NEXT: s_mov_b64 exec, s[2:3]
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: v_mov_b32_e32 v1, s6
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[4:5]
Expand All @@ -212,14 +212,14 @@ define amdgpu_kernel void @set_inactive_v2i16(ptr addrspace(1) %out, <2 x i16> %
define amdgpu_kernel void @set_inactive_v2f16(ptr addrspace(1) %out, <2 x half> %in) {
; GCN-LABEL: set_inactive_v2f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_load_dword s6, s[4:5], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
; GCN-NEXT: v_mov_b32_e32 v0, 0x3c003c00
; GCN-NEXT: s_mov_b64 exec, s[2:3]
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: v_mov_b32_e32 v1, s6
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[4:5]
Expand All @@ -236,7 +236,7 @@ define amdgpu_kernel void @set_inactive_v2f16(ptr addrspace(1) %out, <2 x half>
define amdgpu_kernel void @set_inactive_v2i32(ptr addrspace(1) %out, <2 x i32> %in) {
; GCN-LABEL: set_inactive_v2i32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, s2
; GCN-NEXT: v_mov_b32_e32 v3, s3
Expand All @@ -259,7 +259,7 @@ define amdgpu_kernel void @set_inactive_v2i32(ptr addrspace(1) %out, <2 x i32> %
define amdgpu_kernel void @set_inactive_v2f32(ptr addrspace(1) %out, <2 x float> %in) {
; GCN-LABEL: set_inactive_v2f32:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, s2
; GCN-NEXT: v_mov_b32_e32 v3, s3
Expand All @@ -282,14 +282,14 @@ define amdgpu_kernel void @set_inactive_v2f32(ptr addrspace(1) %out, <2 x float>
define amdgpu_kernel void @set_inactive_v2bf16(ptr addrspace(1) %out, <2 x bfloat> %in) {
; GCN-LABEL: set_inactive_v2bf16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_load_dword s6, s[4:5], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1
; GCN-NEXT: v_mov_b32_e32 v0, 0x3f803f80
; GCN-NEXT: s_mov_b64 exec, s[2:3]
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: v_mov_b32_e32 v1, s6
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[4:5]
Expand All @@ -306,7 +306,7 @@ define amdgpu_kernel void @set_inactive_v2bf16(ptr addrspace(1) %out, <2 x bfloa
define amdgpu_kernel void @set_inactive_v4i16(ptr addrspace(1) %out, <4 x i16> %in) {
; GCN-LABEL: set_inactive_v4i16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: v_mov_b32_e32 v0, 0x10001
; GCN-NEXT: s_mov_b64 exec, s[4:5]
Expand All @@ -332,7 +332,7 @@ define amdgpu_kernel void @set_inactive_v4i16(ptr addrspace(1) %out, <4 x i16> %
define amdgpu_kernel void @set_inactive_v4f16(ptr addrspace(1) %out, <4 x half> %in) {
; GCN-LABEL: set_inactive_v4f16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: v_mov_b32_e32 v0, 0x3c003c00
; GCN-NEXT: s_mov_b64 exec, s[4:5]
Expand All @@ -358,7 +358,7 @@ define amdgpu_kernel void @set_inactive_v4f16(ptr addrspace(1) %out, <4 x half>
define amdgpu_kernel void @set_inactive_v4bf16(ptr addrspace(1) %out, <4 x bfloat> %in) {
; GCN-LABEL: set_inactive_v4bf16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: v_mov_b32_e32 v0, 0x3f803f80
; GCN-NEXT: s_mov_b64 exec, s[4:5]
Expand All @@ -384,7 +384,7 @@ define amdgpu_kernel void @set_inactive_v4bf16(ptr addrspace(1) %out, <4 x bfloa
define amdgpu_kernel void @set_inactive_p0(ptr addrspace(1) %out, ptr %in) {
; GCN-LABEL: set_inactive_p0:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, s2
; GCN-NEXT: v_mov_b32_e32 v3, s3
Expand All @@ -407,11 +407,11 @@ define amdgpu_kernel void @set_inactive_p0(ptr addrspace(1) %out, ptr %in) {
define amdgpu_kernel void @set_inactive_p2(ptr addrspace(1) %out, ptr addrspace(2) %in) {
; GCN-LABEL: set_inactive_p2:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_load_dword s3, s[4:5], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, v1, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[4:5]
Expand All @@ -428,11 +428,11 @@ define amdgpu_kernel void @set_inactive_p2(ptr addrspace(1) %out, ptr addrspace(
define amdgpu_kernel void @set_inactive_p3(ptr addrspace(1) %out, ptr addrspace(3) %in) {
; GCN-LABEL: set_inactive_p3:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_load_dword s3, s[4:5], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, v1, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[4:5]
Expand All @@ -449,11 +449,11 @@ define amdgpu_kernel void @set_inactive_p3(ptr addrspace(1) %out, ptr addrspace(
define amdgpu_kernel void @set_inactive_p5(ptr addrspace(1) %out, ptr addrspace(5) %in) {
; GCN-LABEL: set_inactive_p5:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_load_dword s3, s[4:5], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, v1, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[4:5]
Expand All @@ -470,11 +470,11 @@ define amdgpu_kernel void @set_inactive_p5(ptr addrspace(1) %out, ptr addrspace(
define amdgpu_kernel void @set_inactive_p6(ptr addrspace(1) %out, ptr addrspace(6) %in) {
; GCN-LABEL: set_inactive_p6:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s4, s[2:3], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GCN-NEXT: s_load_dword s3, s[4:5], 0x2c
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, s4
; GCN-NEXT: v_mov_b32_e32 v1, s3
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, v1, s[4:5]
; GCN-NEXT: s_mov_b64 exec, s[4:5]
Expand Down
30 changes: 15 additions & 15 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.trig.preop.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ define double @v_trig_preop_f64_imm(double %a, i32 %b) {
define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
; CI-LABEL: s_trig_preop_f64:
; CI: ; %bb.0:
; CI-NEXT: s_load_dword s2, s[6:7], 0x2
; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CI-NEXT: s_load_dword s2, s[8:9], 0x2
; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_mov_b32_e32 v0, s2
; CI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
Expand All @@ -57,8 +57,8 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
;
; VI-LABEL: s_trig_preop_f64:
; VI: ; %bb.0:
; VI-NEXT: s_load_dword s2, s[6:7], 0x8
; VI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; VI-NEXT: s_load_dword s2, s[8:9], 0x8
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_mov_b32_e32 v0, s2
; VI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
Expand All @@ -74,8 +74,8 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
;
; GFX9-LABEL: s_trig_preop_f64:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s2, s[6:7], 0x8
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX9-NEXT: s_load_dword s2, s[8:9], 0x8
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s2
; GFX9-NEXT: v_trig_preop_f64 v[0:1], s[0:1], v0
Expand All @@ -86,8 +86,8 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
; GFX10-LABEL: s_trig_preop_f64:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX10-NEXT: s_load_dword s2, s[6:7], 0x8
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_trig_preop_f64 v[0:1], s[0:1], s2
; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
Expand All @@ -97,8 +97,8 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
; GFX11-LABEL: s_trig_preop_f64:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
; GFX11-NEXT: s_load_b32 s2, s[2:3], 0x8
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[0:1], s2
; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc
Expand All @@ -112,7 +112,7 @@ define amdgpu_kernel void @s_trig_preop_f64(double %a, i32 %b) {
define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
; CI-LABEL: s_trig_preop_f64_imm:
; CI: ; %bb.0:
; CI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; CI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; CI-NEXT: s_waitcnt lgkmcnt(0)
; CI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
; CI-NEXT: s_add_u32 s0, s0, 4
Expand All @@ -127,7 +127,7 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
;
; VI-LABEL: s_trig_preop_f64_imm:
; VI: ; %bb.0:
; VI-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
; VI-NEXT: s_add_u32 s0, s0, 4
Expand All @@ -142,7 +142,7 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
;
; GFX9-LABEL: s_trig_preop_f64_imm:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
; GFX9-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
Expand All @@ -151,7 +151,7 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
;
; GFX10-LABEL: s_trig_preop_f64_imm:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
; GFX10-NEXT: flat_store_dwordx2 v[0:1], v[0:1]
Expand All @@ -160,7 +160,7 @@ define amdgpu_kernel void @s_trig_preop_f64_imm(double %a, i32 %b) {
;
; GFX11-LABEL: s_trig_preop_f64_imm:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_trig_preop_f64 v[0:1], s[0:1], 7
; GFX11-NEXT: flat_store_b64 v[0:1], v[0:1] dlc
Expand Down
132 changes: 66 additions & 66 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll

Large diffs are not rendered by default.

98 changes: 49 additions & 49 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.update.dpp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
; GFX8-LABEL: dpp_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v2, s2
; GFX8-NEXT: v_mov_b32_e32 v0, s3
Expand All @@ -19,18 +19,18 @@ define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
;
; GFX10-LABEL: dpp_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s6
; GFX10-NEXT: v_mov_b32_e32 v1, s7
; GFX10-NEXT: v_mov_b32_e32 v0, s2
; GFX10-NEXT: v_mov_b32_e32 v1, s3
; GFX10-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: global_store_dword v1, v0, s[4:5]
; GFX10-NEXT: global_store_dword v1, v0, s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: dpp_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
; GFX11-NEXT: v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
Expand All @@ -46,7 +46,7 @@ define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) {
define amdgpu_kernel void @update_dppi64_test(ptr addrspace(1) %arg, i64 %in1, i64 %in2) {
; GFX8-LABEL: update_dppi64_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -64,21 +64,21 @@ define amdgpu_kernel void @update_dppi64_test(ptr addrspace(1) %arg, i64 %in1, i
;
; GFX10-LABEL: update_dppi64_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[4:5]
; GFX10-NEXT: v_mov_b32_e32 v2, s6
; GFX10-NEXT: v_mov_b32_e32 v3, s7
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
; GFX10-NEXT: v_mov_b32_e32 v2, s2
; GFX10-NEXT: v_mov_b32_e32 v3, s3
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX10-NEXT: v_mov_b32_dpp v3, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[4:5]
; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: update_dppi64_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
Expand All @@ -102,7 +102,7 @@ define amdgpu_kernel void @update_dppi64_test(ptr addrspace(1) %arg, i64 %in1, i
define amdgpu_kernel void @update_dppf64_test(ptr addrspace(1) %arg, double %in1, double %in2) {
; GFX8-LABEL: update_dppf64_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -120,21 +120,21 @@ define amdgpu_kernel void @update_dppf64_test(ptr addrspace(1) %arg, double %in1
;
; GFX10-LABEL: update_dppf64_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[4:5]
; GFX10-NEXT: v_mov_b32_e32 v2, s6
; GFX10-NEXT: v_mov_b32_e32 v3, s7
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
; GFX10-NEXT: v_mov_b32_e32 v2, s2
; GFX10-NEXT: v_mov_b32_e32 v3, s3
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX10-NEXT: v_mov_b32_dpp v3, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[4:5]
; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: update_dppf64_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
Expand All @@ -158,7 +158,7 @@ define amdgpu_kernel void @update_dppf64_test(ptr addrspace(1) %arg, double %in1
define amdgpu_kernel void @update_dppv2i32_test(ptr addrspace(1) %arg, <2 x i32> %in1, <2 x i32> %in2) {
; GFX8-LABEL: update_dppv2i32_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -176,21 +176,21 @@ define amdgpu_kernel void @update_dppv2i32_test(ptr addrspace(1) %arg, <2 x i32>
;
; GFX10-LABEL: update_dppv2i32_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[4:5]
; GFX10-NEXT: v_mov_b32_e32 v2, s6
; GFX10-NEXT: v_mov_b32_e32 v3, s7
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
; GFX10-NEXT: v_mov_b32_e32 v2, s2
; GFX10-NEXT: v_mov_b32_e32 v3, s3
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX10-NEXT: v_mov_b32_dpp v3, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[4:5]
; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: update_dppv2i32_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
Expand All @@ -214,7 +214,7 @@ define amdgpu_kernel void @update_dppv2i32_test(ptr addrspace(1) %arg, <2 x i32>
define amdgpu_kernel void @update_dppv2f32_test(ptr addrspace(1) %arg, <2 x float> %in1, <2 x float> %in2) {
; GFX8-LABEL: update_dppv2f32_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -232,21 +232,21 @@ define amdgpu_kernel void @update_dppv2f32_test(ptr addrspace(1) %arg, <2 x floa
;
; GFX10-LABEL: update_dppv2f32_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[4:5]
; GFX10-NEXT: v_mov_b32_e32 v2, s6
; GFX10-NEXT: v_mov_b32_e32 v3, s7
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
; GFX10-NEXT: v_mov_b32_e32 v2, s2
; GFX10-NEXT: v_mov_b32_e32 v3, s3
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX10-NEXT: v_mov_b32_dpp v3, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[4:5]
; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: update_dppv2f32_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
Expand All @@ -270,7 +270,7 @@ define amdgpu_kernel void @update_dppv2f32_test(ptr addrspace(1) %arg, <2 x floa
define amdgpu_kernel void @update_dpp_p0_test(ptr addrspace(1) %arg, ptr %in1, ptr %in2) {
; GFX8-LABEL: update_dpp_p0_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v0, s0
Expand All @@ -288,21 +288,21 @@ define amdgpu_kernel void @update_dpp_p0_test(ptr addrspace(1) %arg, ptr %in1, p
;
; GFX10-LABEL: update_dpp_p0_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v4, 3, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[4:5]
; GFX10-NEXT: v_mov_b32_e32 v2, s6
; GFX10-NEXT: v_mov_b32_e32 v3, s7
; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[0:1]
; GFX10-NEXT: v_mov_b32_e32 v2, s2
; GFX10-NEXT: v_mov_b32_e32 v3, s3
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_mov_b32_dpp v2, v0 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX10-NEXT: v_mov_b32_dpp v3, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[4:5]
; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: update_dpp_p0_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3
Expand All @@ -326,7 +326,7 @@ define amdgpu_kernel void @update_dpp_p0_test(ptr addrspace(1) %arg, ptr %in1, p
define amdgpu_kernel void @update_dpp_p3_test(ptr addrspace(3) %arg, ptr addrspace(3) %in1, ptr %in2) {
; GFX8-LABEL: update_dpp_p3_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX8-NEXT: s_mov_b32 m0, -1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -341,7 +341,7 @@ define amdgpu_kernel void @update_dpp_p3_test(ptr addrspace(3) %arg, ptr addrspa
;
; GFX10-LABEL: update_dpp_p3_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0
Expand All @@ -354,7 +354,7 @@ define amdgpu_kernel void @update_dpp_p3_test(ptr addrspace(3) %arg, ptr addrspa
;
; GFX11-LABEL: update_dpp_p3_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -377,11 +377,11 @@ define amdgpu_kernel void @update_dpp_p5_test(ptr addrspace(5) %arg, ptr addrspa
; GFX8-LABEL: update_dpp_p5_test:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
; GFX8-NEXT: s_mov_b32 s90, -1
; GFX8-NEXT: s_mov_b32 s91, 0xe80000
; GFX8-NEXT: s_add_u32 s88, s88, s9
; GFX8-NEXT: s_add_u32 s88, s88, s11
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX8-NEXT: s_addc_u32 s89, s89, 0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -396,13 +396,13 @@ define amdgpu_kernel void @update_dpp_p5_test(ptr addrspace(5) %arg, ptr addrspa
;
; GFX10-LABEL: update_dpp_p5_test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX10-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX10-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX10-NEXT: s_mov_b32 s14, -1
; GFX10-NEXT: s_mov_b32 s15, 0x31c16000
; GFX10-NEXT: s_add_u32 s12, s12, s9
; GFX10-NEXT: s_add_u32 s12, s12, s11
; GFX10-NEXT: s_addc_u32 s13, s13, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_add_nc_u32_e32 v0, s0, v0
Expand All @@ -415,7 +415,7 @@ define amdgpu_kernel void @update_dpp_p5_test(ptr addrspace(5) %arg, ptr addrspa
;
; GFX11-LABEL: update_dpp_p5_test:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
; GFX11-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
define amdgpu_kernel void @localize_constants(i1 %cond) {
; GFX9-LABEL: localize_constants:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dword s1, s[6:7], 0x0
; GFX9-NEXT: s_load_dword s1, s[8:9], 0x0
; GFX9-NEXT: s_mov_b32 s0, 1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_xor_b32 s1, s1, 1
Expand Down Expand Up @@ -95,7 +95,7 @@ bb2:
define amdgpu_kernel void @localize_globals(i1 %cond) {
; GFX9-LABEL: localize_globals:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dword s1, s[6:7], 0x0
; GFX9-NEXT: s_load_dword s1, s[8:9], 0x0
; GFX9-NEXT: s_mov_b32 s0, 1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_xor_b32 s1, s1, 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ define float @test_fmamix_constant_bus_violation_sss(i32 inreg %val.0, i32 inreg
; CHECK-LABEL: test_fmamix_constant_bus_violation_sss:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_lshr_b32 s4, s6, 16
; CHECK-NEXT: s_lshr_b32 s5, s7, 16
; CHECK-NEXT: s_lshr_b32 s6, s16, 16
; CHECK-NEXT: s_lshr_b32 s5, s17, 16
; CHECK-NEXT: s_lshr_b32 s6, s18, 16
; CHECK-NEXT: s_lshr_b32 s4, s16, 16
; CHECK-NEXT: v_mov_b32_e32 v0, s5
; CHECK-NEXT: v_mov_b32_e32 v1, s6
; CHECK-NEXT: v_mad_mix_f32 v0, s4, v0, v1 op_sel_hi:[1,1,1]
Expand All @@ -32,8 +32,8 @@ define float @test_fmamix_constant_bus_violation_ssv(i32 inreg %val.0, i32 inreg
; CHECK-LABEL: test_fmamix_constant_bus_violation_ssv:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_lshr_b32 s5, s7, 16
; CHECK-NEXT: s_lshr_b32 s4, s6, 16
; CHECK-NEXT: s_lshr_b32 s5, s17, 16
; CHECK-NEXT: s_lshr_b32 s4, s16, 16
; CHECK-NEXT: v_mov_b32_e32 v1, s5
; CHECK-NEXT: v_mad_mix_f32 v0, s4, v1, v0 op_sel:[0,0,1] op_sel_hi:[1,1,1]
; CHECK-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -57,8 +57,8 @@ define float @test_fmamix_constant_bus_violation_svs(i32 inreg %val.0, i32 %val.
; CHECK-LABEL: test_fmamix_constant_bus_violation_svs:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_lshr_b32 s5, s7, 16
; CHECK-NEXT: s_lshr_b32 s4, s6, 16
; CHECK-NEXT: s_lshr_b32 s5, s17, 16
; CHECK-NEXT: s_lshr_b32 s4, s16, 16
; CHECK-NEXT: v_mov_b32_e32 v1, s5
; CHECK-NEXT: v_mad_mix_f32 v0, s4, v0, v1 op_sel:[0,1,0] op_sel_hi:[1,1,1]
; CHECK-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -82,8 +82,8 @@ define float @test_fmamix_constant_bus_violation_vss(i32 %val.0, i32 inreg %val.
; CHECK-LABEL: test_fmamix_constant_bus_violation_vss:
; CHECK: ; %bb.0:
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_lshr_b32 s5, s7, 16
; CHECK-NEXT: s_lshr_b32 s4, s6, 16
; CHECK-NEXT: s_lshr_b32 s5, s17, 16
; CHECK-NEXT: s_lshr_b32 s4, s16, 16
; CHECK-NEXT: v_mov_b32_e32 v1, s5
; CHECK-NEXT: v_mad_mix_f32 v0, v0, s4, v1 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; CHECK-NEXT: s_setpc_b64 s[30:31]
Expand Down
222 changes: 111 additions & 111 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/mul-known-bits.i64.ll

Large diffs are not rendered by default.

122 changes: 61 additions & 61 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/mul.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2542,7 +2542,7 @@ define amdgpu_ps void @s_mul_u64_zext_with_vregs(ptr addrspace(1) %out, ptr addr
define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; GFX7-LABEL: s_mul_u64_zext_with_sregs:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX7-NEXT: v_mov_b32_e32 v0, 0x50
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_load_dword s3, s[2:3], 0x0
Expand All @@ -2559,52 +2559,52 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX8-LABEL: s_mul_u64_zext_with_sregs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_mov_b32_e32 v0, 0x50
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s0, s[6:7], 0x0
; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: v_mov_b32_e32 v3, s5
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v3, s1
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0
; GFX8-NEXT: s_mulk_i32 s0, 0x50
; GFX8-NEXT: v_readfirstlane_b32 s1, v0
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX8-NEXT: s_mulk_i32 s2, 0x50
; GFX8-NEXT: v_readfirstlane_b32 s3, v0
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8-NEXT: s_endpgm
;
; GFX9-LABEL: s_mul_u64_zext_with_sregs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s1, s[6:7], 0x0
; GFX9-NEXT: s_load_dword s3, s[2:3], 0x0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_mul_i32 s0, s1, 0x50
; GFX9-NEXT: s_mul_hi_u32 s1, s1, 0x50
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX9-NEXT: s_mul_i32 s2, s3, 0x50
; GFX9-NEXT: s_mul_hi_u32 s3, s3, 0x50
; GFX9-NEXT: v_mov_b32_e32 v0, s2
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: s_mul_u64_zext_with_sregs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s1, s[6:7], 0x0
; GFX10-NEXT: s_load_dword s3, s[2:3], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_mul_i32 s0, s1, 0x50
; GFX10-NEXT: s_mul_hi_u32 s1, s1, 0x50
; GFX10-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX10-NEXT: s_mul_i32 s2, s3, 0x50
; GFX10-NEXT: s_mul_hi_u32 s3, s3, 0x50
; GFX10-NEXT: v_mov_b32_e32 v0, s2
; GFX10-NEXT: v_mov_b32_e32 v1, s3
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: s_mul_u64_zext_with_sregs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b32 s3, s[2:3], 0x0
Expand All @@ -2619,7 +2619,7 @@ define amdgpu_kernel void @s_mul_u64_zext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX12-LABEL: s_mul_u64_zext_with_sregs:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
Expand Down Expand Up @@ -2718,7 +2718,7 @@ define amdgpu_ps void @s_mul_u64_sext_with_vregs(ptr addrspace(1) %out, ptr addr
define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr addrspace(1) %in) {
; GFX7-LABEL: s_mul_u64_sext_with_sregs:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x9
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GFX7-NEXT: v_mov_b32_e32 v0, 0x50
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_load_dword s3, s[2:3], 0x0
Expand All @@ -2738,61 +2738,61 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX8-LABEL: s_mul_u64_sext_with_sregs:
; GFX8: ; %bb.0:
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: v_mov_b32_e32 v0, 0x50
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_load_dword s0, s[6:7], 0x0
; GFX8-NEXT: v_mov_b32_e32 v2, s4
; GFX8-NEXT: v_mov_b32_e32 v3, s5
; GFX8-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX8-NEXT: v_mov_b32_e32 v3, s1
; GFX8-NEXT: v_mov_b32_e32 v2, s0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mul_hi_u32 v0, s0, v0
; GFX8-NEXT: s_ashr_i32 s1, s0, 31
; GFX8-NEXT: s_mulk_i32 s0, 0x50
; GFX8-NEXT: s_mulk_i32 s1, 0x50
; GFX8-NEXT: v_readfirstlane_b32 s2, v0
; GFX8-NEXT: s_add_u32 s1, s1, s2
; GFX8-NEXT: v_mov_b32_e32 v0, s0
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0
; GFX8-NEXT: s_ashr_i32 s3, s2, 31
; GFX8-NEXT: s_mulk_i32 s2, 0x50
; GFX8-NEXT: s_mulk_i32 s3, 0x50
; GFX8-NEXT: v_readfirstlane_b32 s4, v0
; GFX8-NEXT: s_add_u32 s3, s3, s4
; GFX8-NEXT: v_mov_b32_e32 v0, s2
; GFX8-NEXT: v_mov_b32_e32 v1, s3
; GFX8-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8-NEXT: s_endpgm
;
; GFX9-LABEL: s_mul_u64_sext_with_sregs:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_load_dword s1, s[6:7], 0x0
; GFX9-NEXT: s_load_dword s3, s[2:3], 0x0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_ashr_i32 s2, s1, 31
; GFX9-NEXT: s_mul_i32 s0, s1, 0x50
; GFX9-NEXT: s_mul_hi_u32 s1, s1, 0x50
; GFX9-NEXT: s_mulk_i32 s2, 0x50
; GFX9-NEXT: s_add_u32 s1, s2, s1
; GFX9-NEXT: v_mov_b32_e32 v0, s0
; GFX9-NEXT: v_mov_b32_e32 v1, s1
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX9-NEXT: s_ashr_i32 s4, s3, 31
; GFX9-NEXT: s_mul_i32 s2, s3, 0x50
; GFX9-NEXT: s_mul_hi_u32 s3, s3, 0x50
; GFX9-NEXT: s_mulk_i32 s4, 0x50
; GFX9-NEXT: s_add_u32 s3, s4, s3
; GFX9-NEXT: v_mov_b32_e32 v0, s2
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: s_mul_u64_sext_with_sregs:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_load_dword s0, s[6:7], 0x0
; GFX10-NEXT: s_load_dword s2, s[2:3], 0x0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_ashr_i32 s1, s0, 31
; GFX10-NEXT: s_mul_hi_u32 s2, s0, 0x50
; GFX10-NEXT: s_mulk_i32 s1, 0x50
; GFX10-NEXT: s_mulk_i32 s0, 0x50
; GFX10-NEXT: s_add_i32 s1, s2, s1
; GFX10-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5]
; GFX10-NEXT: s_ashr_i32 s3, s2, 31
; GFX10-NEXT: s_mul_hi_u32 s4, s2, 0x50
; GFX10-NEXT: s_mulk_i32 s3, 0x50
; GFX10-NEXT: s_mulk_i32 s2, 0x50
; GFX10-NEXT: s_add_i32 s3, s4, s3
; GFX10-NEXT: v_mov_b32_e32 v0, s2
; GFX10-NEXT: v_mov_b32_e32 v1, s3
; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: s_mul_u64_sext_with_sregs:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_load_b32 s2, s[2:3], 0x0
Expand All @@ -2810,7 +2810,7 @@ define amdgpu_kernel void @s_mul_u64_sext_with_sregs(ptr addrspace(1) %out, ptr
;
; GFX12-LABEL: s_mul_u64_sext_with_sregs:
; GFX12: ; %bb.0:
; GFX12-NEXT: s_load_b128 s[0:3], s[2:3], 0x24
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_mov_b32_e32 v2, 0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_load_b32 s2, s[2:3], 0x0
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/non-entry-alloca.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,21 @@
define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align4(ptr addrspace(1) %out, i32 %arg.cond0, i32 %arg.cond1, i32 %in) {
; GCN-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align4:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dword s4, s[6:7], 0x8
; GCN-NEXT: s_add_u32 s0, s0, s15
; GCN-NEXT: s_load_dword s4, s[8:9], 0x8
; GCN-NEXT: s_add_u32 s0, s0, s17
; GCN-NEXT: s_addc_u32 s1, s1, 0
; GCN-NEXT: s_mov_b32 s33, 0
; GCN-NEXT: s_movk_i32 s32, 0x400
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s4, 0
; GCN-NEXT: s_cbranch_scc1 .LBB0_3
; GCN-NEXT: ; %bb.1: ; %bb.0
; GCN-NEXT: s_load_dword s4, s[6:7], 0xc
; GCN-NEXT: s_load_dword s4, s[8:9], 0xc
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s4, 0
; GCN-NEXT: s_cbranch_scc1 .LBB0_3
; GCN-NEXT: ; %bb.2: ; %bb.1
; GCN-NEXT: s_load_dword s5, s[6:7], 0x10
; GCN-NEXT: s_load_dword s5, s[8:9], 0x10
; GCN-NEXT: s_add_u32 s4, s32, 0x1000
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_mov_b32_e32 v2, s4
Expand All @@ -39,7 +39,7 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4
; GCN-NEXT: v_mov_b32_e32 v2, s4
; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
; GCN-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
; GCN-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_add_u32_e32 v0, v2, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -84,16 +84,16 @@ bb.2:
define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reached_align64(ptr addrspace(1) %out, i32 %arg.cond, i32 %in) {
; GCN-LABEL: kernel_non_entry_block_static_alloca_uniformly_reached_align64:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dword s4, s[6:7], 0x8
; GCN-NEXT: s_add_u32 s0, s0, s15
; GCN-NEXT: s_load_dword s4, s[8:9], 0x8
; GCN-NEXT: s_add_u32 s0, s0, s17
; GCN-NEXT: s_addc_u32 s1, s1, 0
; GCN-NEXT: s_mov_b32 s33, 0
; GCN-NEXT: s_movk_i32 s32, 0x1000
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s4, 0
; GCN-NEXT: s_cbranch_scc1 .LBB1_2
; GCN-NEXT: ; %bb.1: ; %bb.0
; GCN-NEXT: s_load_dword s4, s[6:7], 0xc
; GCN-NEXT: s_load_dword s4, s[8:9], 0xc
; GCN-NEXT: s_add_u32 s5, s32, 0x1000
; GCN-NEXT: s_and_b32 s5, s5, 0xfffff000
; GCN-NEXT: v_mov_b32_e32 v1, 0
Expand All @@ -106,7 +106,7 @@ define amdgpu_kernel void @kernel_non_entry_block_static_alloca_uniformly_reache
; GCN-NEXT: buffer_store_dword v3, v2, s[0:3], 0 offen offset:4
; GCN-NEXT: v_mov_b32_e32 v2, s4
; GCN-NEXT: buffer_load_dword v2, v2, s[0:3], 0 offen
; GCN-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x0
; GCN-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_add_u32_e32 v0, v2, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
Expand Down
1,592 changes: 796 additions & 796 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/sdivrem.ll

Large diffs are not rendered by default.

20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,7 @@ define i64 @v_shl_i64_sext_i32_overflow(i32 %x) {
define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) {
; GFX7-LABEL: mulu24_shl64:
; GFX7: ; %bb.0: ; %bb
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX7-NEXT: v_and_b32_e32 v0, 6, v0
; GFX7-NEXT: v_mul_u32_u24_e32 v0, 7, v0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
Expand All @@ -251,7 +251,7 @@ define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) {
;
; GFX8-LABEL: mulu24_shl64:
; GFX8: ; %bb.0: ; %bb
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX8-NEXT: v_and_b32_e32 v0, 6, v0
; GFX8-NEXT: v_mul_u32_u24_e32 v0, 7, v0
; GFX8-NEXT: v_mov_b32_e32 v1, 0
Expand All @@ -266,7 +266,7 @@ define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) {
;
; GFX9-LABEL: mulu24_shl64:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX9-NEXT: v_and_b32_e32 v0, 6, v0
; GFX9-NEXT: v_mul_u32_u24_e32 v0, 7, v0
; GFX9-NEXT: v_mov_b32_e32 v1, 0
Expand All @@ -281,7 +281,7 @@ define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) {
;
; GFX10-LABEL: mulu24_shl64:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0
; GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
; GFX10-NEXT: v_and_b32_e32 v0, 6, v0
; GFX10-NEXT: v_mov_b32_e32 v1, 0
; GFX10-NEXT: v_mul_u32_u24_e32 v0, 7, v0
Expand All @@ -296,7 +296,7 @@ define amdgpu_kernel void @mulu24_shl64(ptr addrspace(1) nocapture %arg) {
;
; GFX11-LABEL: mulu24_shl64:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_load_b64 s[0:1], s[2:3], 0x0
; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 6, v0
; GFX11-NEXT: v_mul_u32_u24_e32 v0, 7, v0
; GFX11-NEXT: v_lshlrev_b64 v[2:3], 2, v[0:1]
Expand All @@ -321,7 +321,7 @@ bb:
define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr addrspace(1) nocapture readonly %arg1) {
; GFX7-LABEL: muli24_shl64:
; GFX7: ; %bb.0: ; %bb
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GFX7-NEXT: v_mov_b32_e32 v2, 0
; GFX7-NEXT: s_mov_b32 s6, 0
Expand All @@ -340,7 +340,7 @@ define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr add
;
; GFX8-LABEL: muli24_shl64:
; GFX8: ; %bb.0: ; %bb
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
; GFX8-NEXT: v_lshlrev_b32_e32 v3, 2, v0
; GFX8-NEXT: v_lshlrev_b32_e32 v5, 3, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
Expand All @@ -363,7 +363,7 @@ define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr add
;
; GFX9-LABEL: muli24_shl64:
; GFX9: ; %bb.0: ; %bb
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GFX9-NEXT: v_mov_b32_e32 v2, 0
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 3, v0
Expand All @@ -378,7 +378,7 @@ define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr add
;
; GFX10-LABEL: muli24_shl64:
; GFX10: ; %bb.0: ; %bb
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[2:3], 0x0
; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x0
; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GFX10-NEXT: v_mov_b32_e32 v2, 0
; GFX10-NEXT: v_lshlrev_b32_e32 v0, 3, v0
Expand All @@ -393,7 +393,7 @@ define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr add
;
; GFX11-LABEL: muli24_shl64:
; GFX11: ; %bb.0: ; %bb
; GFX11-NEXT: s_load_b128 s[0:3], s[2:3], 0x0
; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v2, 0x3ff, v0
; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v2
; GFX11-NEXT: v_lshlrev_b32_e32 v2, 3, v2
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ entry:
; GCN-LABEL: {{^}}smrd6:
; SICIVI: s_add_u32 s{{[0-9]}}, s{{[0-9]}}, -4
; SICIVI: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0
; GFX9_10: s_add_u32 s0, s6, -4
; GFX9_10: s_addc_u32 s1, s7, -1
; GFX9_10: s_add_u32 s2, s2, -4
; GFX9_10: s_addc_u32 s3, s3, -1
; GFX9_10: s_load_dword s{{[0-9]}}, s[{{[0-9]:[0-9]}}], 0x0
define amdgpu_kernel void @smrd6(ptr addrspace(1) %out, ptr addrspace(4) %ptr) #0 {
entry:
Expand Down
488 changes: 244 additions & 244 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.128.ll

Large diffs are not rendered by default.

460 changes: 242 additions & 218 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/store-local.96.ll

Large diffs are not rendered by default.

837 changes: 419 additions & 418 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/udivrem.ll

Large diffs are not rendered by default.

250 changes: 125 additions & 125 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/vni8-across-blocks.ll

Large diffs are not rendered by default.

188 changes: 94 additions & 94 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/widen-i8-i16-scalar-loads.ll

Large diffs are not rendered by default.

791 changes: 396 additions & 395 deletions llvm/test/CodeGen/AMDGPU/add.ll

Large diffs are not rendered by default.

420 changes: 210 additions & 210 deletions llvm/test/CodeGen/AMDGPU/add.v2i16.ll

Large diffs are not rendered by default.

106 changes: 53 additions & 53 deletions llvm/test/CodeGen/AMDGPU/agpr-copy-no-free-registers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ define void @no_free_vgprs_at_agpr_to_agpr_copy(float %v0, float %v1) #0 {
define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #0 {
; GFX908-LABEL: no_agpr_no_reserve:
; GFX908: ; %bb.0:
; GFX908-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX908-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX908-NEXT: v_lshlrev_b32_e32 v0, 7, v0
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: global_load_dwordx4 v[1:4], v0, s[0:1] offset:16
Expand Down Expand Up @@ -302,7 +302,7 @@ define amdgpu_kernel void @no_agpr_no_reserve(ptr addrspace(1) %arg) #0 {
;
; GFX90A-LABEL: no_agpr_no_reserve:
; GFX90A: ; %bb.0:
; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GFX90A-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GFX90A-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX90A-NEXT: v_lshlrev_b32_e32 v32, 7, v0
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
Expand Down Expand Up @@ -514,40 +514,40 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-LABEL: introduced_copy_to_sgpr:
; GFX908: ; %bb.0: ; %bb
; GFX908-NEXT: global_load_ushort v16, v[0:1], off glc
; GFX908-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
; GFX908-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x10
; GFX908-NEXT: s_load_dword s9, s[6:7], 0x18
; GFX908-NEXT: s_mov_b32 s8, 0
; GFX908-NEXT: s_mov_b32 s7, s8
; GFX908-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
; GFX908-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
; GFX908-NEXT: s_load_dword s7, s[8:9], 0x18
; GFX908-NEXT: s_mov_b32 s6, 0
; GFX908-NEXT: s_mov_b32 s9, s6
; GFX908-NEXT: s_waitcnt lgkmcnt(0)
; GFX908-NEXT: v_cvt_f32_u32_e32 v0, s3
; GFX908-NEXT: s_sub_i32 s6, 0, s3
; GFX908-NEXT: v_cvt_f32_f16_e32 v17, s9
; GFX908-NEXT: s_sub_i32 s8, 0, s3
; GFX908-NEXT: v_cvt_f32_f16_e32 v17, s7
; GFX908-NEXT: v_mov_b32_e32 v19, 0
; GFX908-NEXT: v_rcp_iflag_f32_e32 v2, v0
; GFX908-NEXT: v_mov_b32_e32 v0, 0
; GFX908-NEXT: v_mov_b32_e32 v1, 0
; GFX908-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GFX908-NEXT: v_cvt_u32_f32_e32 v2, v2
; GFX908-NEXT: v_readfirstlane_b32 s10, v2
; GFX908-NEXT: s_mul_i32 s6, s6, s10
; GFX908-NEXT: s_mul_hi_u32 s6, s10, s6
; GFX908-NEXT: s_add_i32 s10, s10, s6
; GFX908-NEXT: s_mul_hi_u32 s6, s2, s10
; GFX908-NEXT: s_mul_i32 s10, s6, s3
; GFX908-NEXT: s_mul_i32 s8, s8, s10
; GFX908-NEXT: s_mul_hi_u32 s8, s10, s8
; GFX908-NEXT: s_add_i32 s10, s10, s8
; GFX908-NEXT: s_mul_hi_u32 s8, s2, s10
; GFX908-NEXT: s_mul_i32 s10, s8, s3
; GFX908-NEXT: s_sub_i32 s2, s2, s10
; GFX908-NEXT: s_add_i32 s11, s6, 1
; GFX908-NEXT: s_add_i32 s11, s8, 1
; GFX908-NEXT: s_sub_i32 s10, s2, s3
; GFX908-NEXT: s_cmp_ge_u32 s2, s3
; GFX908-NEXT: s_cselect_b32 s6, s11, s6
; GFX908-NEXT: s_cselect_b32 s8, s11, s8
; GFX908-NEXT: s_cselect_b32 s2, s10, s2
; GFX908-NEXT: s_add_i32 s10, s6, 1
; GFX908-NEXT: s_add_i32 s10, s8, 1
; GFX908-NEXT: s_cmp_ge_u32 s2, s3
; GFX908-NEXT: s_cselect_b32 s6, s10, s6
; GFX908-NEXT: s_lshr_b32 s9, s9, 16
; GFX908-NEXT: s_lshl_b64 s[12:13], s[6:7], 5
; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s9
; GFX908-NEXT: s_cselect_b32 s8, s10, s8
; GFX908-NEXT: s_lshr_b32 s7, s7, 16
; GFX908-NEXT: v_cvt_f32_f16_e32 v18, s7
; GFX908-NEXT: s_lshl_b64 s[2:3], s[0:1], 5
; GFX908-NEXT: s_lshl_b64 s[12:13], s[8:9], 5
; GFX908-NEXT: s_lshl_b64 s[10:11], s[4:5], 5
; GFX908-NEXT: s_or_b32 s10, s10, 28
; GFX908-NEXT: s_waitcnt vmcnt(0)
Expand All @@ -572,15 +572,15 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX908-NEXT: global_load_dwordx2 v[2:3], v[0:1], off
; GFX908-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], -1
; GFX908-NEXT: s_mov_b32 s9, s8
; GFX908-NEXT: s_mov_b32 s7, s6
; GFX908-NEXT: v_cndmask_b32_e64 v6, 0, 1, s[0:1]
; GFX908-NEXT: v_mov_b32_e32 v4, s8
; GFX908-NEXT: v_mov_b32_e32 v4, s6
; GFX908-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v6
; GFX908-NEXT: v_mov_b32_e32 v8, s8
; GFX908-NEXT: v_mov_b32_e32 v6, s8
; GFX908-NEXT: v_mov_b32_e32 v5, s9
; GFX908-NEXT: v_mov_b32_e32 v9, s9
; GFX908-NEXT: v_mov_b32_e32 v7, s9
; GFX908-NEXT: v_mov_b32_e32 v9, s7
; GFX908-NEXT: v_mov_b32_e32 v6, s6
; GFX908-NEXT: v_mov_b32_e32 v5, s7
; GFX908-NEXT: v_mov_b32_e32 v8, s6
; GFX908-NEXT: v_mov_b32_e32 v7, s7
; GFX908-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0
; GFX908-NEXT: v_mov_b32_e32 v11, v5
; GFX908-NEXT: s_mov_b64 s[18:19], s[10:11]
Expand Down Expand Up @@ -670,7 +670,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX908-NEXT: s_cbranch_vccz .LBB3_1
; GFX908-NEXT: ; %bb.11: ; %bb12
; GFX908-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX908-NEXT: s_add_u32 s4, s4, s6
; GFX908-NEXT: s_add_u32 s4, s4, s8
; GFX908-NEXT: s_addc_u32 s5, s5, 0
; GFX908-NEXT: s_add_u32 s10, s10, s12
; GFX908-NEXT: s_addc_u32 s11, s11, s13
Expand All @@ -682,39 +682,39 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-LABEL: introduced_copy_to_sgpr:
; GFX90A: ; %bb.0: ; %bb
; GFX90A-NEXT: global_load_ushort v18, v[0:1], off glc
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[6:7], 0x10
; GFX90A-NEXT: s_load_dword s9, s[6:7], 0x18
; GFX90A-NEXT: s_mov_b32 s8, 0
; GFX90A-NEXT: s_mov_b32 s7, s8
; GFX90A-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
; GFX90A-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x10
; GFX90A-NEXT: s_load_dword s7, s[8:9], 0x18
; GFX90A-NEXT: s_mov_b32 s6, 0
; GFX90A-NEXT: s_mov_b32 s9, s6
; GFX90A-NEXT: s_waitcnt lgkmcnt(0)
; GFX90A-NEXT: v_cvt_f32_u32_e32 v0, s3
; GFX90A-NEXT: s_sub_i32 s6, 0, s3
; GFX90A-NEXT: s_sub_i32 s8, 0, s3
; GFX90A-NEXT: v_mov_b32_e32 v19, 0
; GFX90A-NEXT: v_rcp_iflag_f32_e32 v2, v0
; GFX90A-NEXT: v_pk_mov_b32 v[0:1], 0, 0
; GFX90A-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2
; GFX90A-NEXT: v_cvt_u32_f32_e32 v3, v2
; GFX90A-NEXT: v_cvt_f32_f16_e32 v2, s9
; GFX90A-NEXT: v_cvt_f32_f16_e32 v2, s7
; GFX90A-NEXT: v_readfirstlane_b32 s10, v3
; GFX90A-NEXT: s_mul_i32 s6, s6, s10
; GFX90A-NEXT: s_mul_hi_u32 s6, s10, s6
; GFX90A-NEXT: s_add_i32 s10, s10, s6
; GFX90A-NEXT: s_mul_hi_u32 s6, s2, s10
; GFX90A-NEXT: s_mul_i32 s10, s6, s3
; GFX90A-NEXT: s_mul_i32 s8, s8, s10
; GFX90A-NEXT: s_mul_hi_u32 s8, s10, s8
; GFX90A-NEXT: s_add_i32 s10, s10, s8
; GFX90A-NEXT: s_mul_hi_u32 s8, s2, s10
; GFX90A-NEXT: s_mul_i32 s10, s8, s3
; GFX90A-NEXT: s_sub_i32 s2, s2, s10
; GFX90A-NEXT: s_add_i32 s11, s6, 1
; GFX90A-NEXT: s_add_i32 s11, s8, 1
; GFX90A-NEXT: s_sub_i32 s10, s2, s3
; GFX90A-NEXT: s_cmp_ge_u32 s2, s3
; GFX90A-NEXT: s_cselect_b32 s6, s11, s6
; GFX90A-NEXT: s_cselect_b32 s8, s11, s8
; GFX90A-NEXT: s_cselect_b32 s2, s10, s2
; GFX90A-NEXT: s_add_i32 s10, s6, 1
; GFX90A-NEXT: s_add_i32 s10, s8, 1
; GFX90A-NEXT: s_cmp_ge_u32 s2, s3
; GFX90A-NEXT: s_cselect_b32 s6, s10, s6
; GFX90A-NEXT: s_lshr_b32 s9, s9, 16
; GFX90A-NEXT: s_lshl_b64 s[12:13], s[6:7], 5
; GFX90A-NEXT: v_cvt_f32_f16_e32 v3, s9
; GFX90A-NEXT: s_cselect_b32 s8, s10, s8
; GFX90A-NEXT: s_lshr_b32 s7, s7, 16
; GFX90A-NEXT: v_cvt_f32_f16_e32 v3, s7
; GFX90A-NEXT: s_lshl_b64 s[2:3], s[0:1], 5
; GFX90A-NEXT: s_lshl_b64 s[12:13], s[8:9], 5
; GFX90A-NEXT: s_lshl_b64 s[10:11], s[4:5], 5
; GFX90A-NEXT: s_or_b32 s10, s10, 28
; GFX90A-NEXT: s_waitcnt vmcnt(0)
Expand All @@ -739,12 +739,12 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX90A-NEXT: global_load_dwordx2 v[4:5], v[0:1], off
; GFX90A-NEXT: v_cmp_gt_i64_e64 s[0:1], s[4:5], -1
; GFX90A-NEXT: s_mov_b32 s9, s8
; GFX90A-NEXT: s_mov_b32 s7, s6
; GFX90A-NEXT: v_cndmask_b32_e64 v8, 0, 1, s[0:1]
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[8:9], s[8:9] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[6:7], s[6:7], s[6:7] op_sel:[0,1]
; GFX90A-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v8
; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[8:9], s[8:9] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], s[8:9], s[8:9] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[10:11], s[6:7], s[6:7] op_sel:[0,1]
; GFX90A-NEXT: v_pk_mov_b32 v[8:9], s[6:7], s[6:7] op_sel:[0,1]
; GFX90A-NEXT: v_cmp_lt_i64_e64 s[16:17], s[4:5], 0
; GFX90A-NEXT: s_mov_b64 s[18:19], s[10:11]
; GFX90A-NEXT: v_pk_mov_b32 v[12:13], v[6:7], v[6:7] op_sel:[0,1]
Expand Down Expand Up @@ -826,7 +826,7 @@ define amdgpu_kernel void @introduced_copy_to_sgpr(i64 %arg, i32 %arg1, i32 %arg
; GFX90A-NEXT: s_cbranch_vccz .LBB3_1
; GFX90A-NEXT: ; %bb.11: ; %bb12
; GFX90A-NEXT: ; in Loop: Header=BB3_2 Depth=1
; GFX90A-NEXT: s_add_u32 s4, s4, s6
; GFX90A-NEXT: s_add_u32 s4, s4, s8
; GFX90A-NEXT: s_addc_u32 s5, s5, 0
; GFX90A-NEXT: s_add_u32 s10, s10, s12
; GFX90A-NEXT: s_addc_u32 s11, s11, s13
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/always-uniform.ll
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ declare i32 @llvm.amdgcn.readfirstlane(i32)
define amdgpu_kernel void @readfirstlane_uniform(ptr addrspace(1) noalias nocapture readonly, ptr addrspace(1) noalias nocapture readonly) {
; GCN-LABEL: readfirstlane_uniform:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
; GCN-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0
; GCN-NEXT: v_readfirstlane_b32 s4, v0
; GCN-NEXT: s_mov_b32 s5, 0
; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 2
Expand Down
20 changes: 10 additions & 10 deletions llvm/test/CodeGen/AMDGPU/amd.endpgm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -41,46 +41,46 @@ define void @test1() {
define amdgpu_kernel void @test2(ptr %p, i32 %x) {
; GFX9-LABEL: test2:
; GFX9: ; %bb.0:
; GFX9-NEXT: s_load_dword s0, s[2:3], 0x2c
; GFX9-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_cmp_lt_i32 s0, 1
; GFX9-NEXT: s_cbranch_scc0 .LBB2_2
; GFX9-NEXT: ; %bb.1: ; %else
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; GFX9-NEXT: v_mov_b32_e32 v2, s0
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: v_mov_b32_e32 v1, s5
; GFX9-NEXT: v_mov_b32_e32 v0, s2
; GFX9-NEXT: v_mov_b32_e32 v1, s3
; GFX9-NEXT: flat_store_dword v[0:1], v2
; GFX9-NEXT: s_endpgm
; GFX9-NEXT: .LBB2_2: ; %then
; GFX9-NEXT: s_endpgm
;
; GFX10-LABEL: test2:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_load_dword s0, s[2:3], 0x2c
; GFX10-NEXT: s_load_dword s0, s[4:5], 0x2c
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_cmp_lt_i32 s0, 1
; GFX10-NEXT: s_cbranch_scc0 .LBB2_2
; GFX10-NEXT: ; %bb.1: ; %else
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24
; GFX10-NEXT: v_mov_b32_e32 v2, s0
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: v_mov_b32_e32 v1, s5
; GFX10-NEXT: v_mov_b32_e32 v0, s2
; GFX10-NEXT: v_mov_b32_e32 v1, s3
; GFX10-NEXT: flat_store_dword v[0:1], v2
; GFX10-NEXT: s_endpgm
; GFX10-NEXT: .LBB2_2: ; %then
; GFX10-NEXT: s_endpgm
;
; GFX11-LABEL: test2:
; GFX11: ; %bb.0:
; GFX11-NEXT: s_load_b32 s0, s[2:3], 0x2c
; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x2c
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_cmp_lt_i32 s0, 1
; GFX11-NEXT: s_cbranch_scc0 .LBB2_2
; GFX11-NEXT: ; %bb.1: ; %else
; GFX11-NEXT: s_load_b64 s[2:3], s[2:3], 0x24
; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x24
; GFX11-NEXT: v_mov_b32_e32 v2, s0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,7 @@ define amdgpu_kernel void @select_add_lhs_const_i16(i1 %cond) {
;
; GCN-LABEL: select_add_lhs_const_i16:
; GCN: ; %bb.0:
; GCN-NEXT: s_load_dword s0, s[6:7], 0x0
; GCN-NEXT: s_load_dword s0, s[8:9], 0x0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_bitcmp1_b32 s0, 0
; GCN-NEXT: s_movk_i32 s0, 0x80
Expand Down
5,705 changes: 2,850 additions & 2,855 deletions llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll

Large diffs are not rendered by default.

258 changes: 129 additions & 129 deletions llvm/test/CodeGen/AMDGPU/amdgpu-demote-scc-branches.ll

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/amdgpu-mul24-knownbits.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ define weak_odr amdgpu_kernel void @test_mul24_knownbits_kernel(ptr addrspace(1)
; GCN-LABEL: test_mul24_knownbits_kernel:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: v_and_b32_e32 v0, 3, v0
; GCN-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
; GCN-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
; GCN-NEXT: v_mul_i32_i24_e32 v0, -5, v0
; GCN-NEXT: v_and_b32_e32 v0, 0xffffffe0, v0
; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
Expand Down
60 changes: 30 additions & 30 deletions llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll
Original file line number Diff line number Diff line change
Expand Up @@ -146,14 +146,14 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_writelane_b32 v43, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v40, v31
; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
; CHECK-NEXT: v_mov_b32_e32 v41, v2
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41
Expand All @@ -163,9 +163,9 @@ define double @test_pow_fast_f64__integral_y(double %x, i32 %y.i) {
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35]
; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
Expand Down Expand Up @@ -285,15 +285,15 @@ define double @test_powr_fast_f64(double %x, double %y) {
; CHECK-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v43, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v42, v31
; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
; CHECK-NEXT: v_mov_b32_e32 v41, v3
; CHECK-NEXT: v_mov_b32_e32 v40, v2
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_mul_f64 v[0:1], v[40:41], v[0:1]
Expand All @@ -302,9 +302,9 @@ define double @test_powr_fast_f64(double %x, double %y) {
; CHECK-NEXT: s_addc_u32 s5, s5, _Z4exp2d@gotpcrel32@hi+12
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35]
; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
Expand Down Expand Up @@ -430,14 +430,14 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_writelane_b32 v43, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v40, v31
; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
; CHECK-NEXT: v_mov_b32_e32 v41, v2
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: v_cvt_f64_i32_e32 v[2:3], v41
Expand All @@ -447,9 +447,9 @@ define double @test_pown_fast_f64(double %x, i32 %y) {
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35]
; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
Expand Down Expand Up @@ -571,13 +571,13 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_writelane_b32 v42, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v40, v31
; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: v_lshlrev_b32_e32 v41, 1, v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
Expand All @@ -588,9 +588,9 @@ define double @test_pown_fast_f64_known_even(double %x, i32 %y.arg) {
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35]
; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
Expand Down Expand Up @@ -715,13 +715,13 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_writelane_b32 v43, s45, 13
; CHECK-NEXT: v_mov_b32_e32 v40, v31
; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
; CHECK-NEXT: s_mov_b32 s44, s13
; CHECK-NEXT: s_mov_b32 s45, s12
; CHECK-NEXT: s_mov_b64 s[36:37], s[10:11]
; CHECK-NEXT: s_mov_b64 s[38:39], s[8:9]
; CHECK-NEXT: s_mov_b64 s[34:35], s[10:11]
; CHECK-NEXT: s_mov_b64 s[36:37], s[8:9]
; CHECK-NEXT: s_mov_b64 s[38:39], s[6:7]
; CHECK-NEXT: v_or_b32_e32 v42, 1, v2
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
Expand All @@ -732,9 +732,9 @@ define double @test_pown_fast_f64_known_odd(double %x, i32 %y.arg) {
; CHECK-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; CHECK-NEXT: s_mov_b64 s[6:7], s[34:35]
; CHECK-NEXT: s_mov_b64 s[8:9], s[38:39]
; CHECK-NEXT: s_mov_b64 s[10:11], s[36:37]
; CHECK-NEXT: s_mov_b64 s[6:7], s[38:39]
; CHECK-NEXT: s_mov_b64 s[8:9], s[36:37]
; CHECK-NEXT: s_mov_b64 s[10:11], s[34:35]
; CHECK-NEXT: s_mov_b32 s12, s45
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AMDGPU/amdgpu.work-item-intrinsics.deprecated.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
; Legacy intrinsics that just read implicit parameters

; FUNC-LABEL: {{^}}ngroups_x:
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x0
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x0
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x0
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x0
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]

Expand All @@ -24,8 +24,8 @@ entry:
}

; FUNC-LABEL: {{^}}ngroups_y:
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x1
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x4
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x1
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x4
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]

Expand All @@ -39,8 +39,8 @@ entry:
}

; FUNC-LABEL: {{^}}ngroups_z:
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x2
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x8
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x2
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x8
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]

Expand All @@ -54,8 +54,8 @@ entry:
}

; FUNC-LABEL: {{^}}global_size_x:
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x3
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0xc
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x3
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0xc
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]

Expand All @@ -69,8 +69,8 @@ entry:
}

; FUNC-LABEL: {{^}}global_size_y:
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x4
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x10
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x4
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x10
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]

Expand All @@ -84,8 +84,8 @@ entry:
}

; FUNC-LABEL: {{^}}global_size_z:
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x5
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x14
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x5
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x14
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]

Expand All @@ -99,8 +99,8 @@ entry:
}

; FUNC-LABEL: {{^}}local_size_x:
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x6
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x18
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x6
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x18
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]

Expand All @@ -114,8 +114,8 @@ entry:
}

; FUNC-LABEL: {{^}}local_size_y:
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x7
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x1c
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x7
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x1c
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]

Expand All @@ -129,8 +129,8 @@ entry:
}

; FUNC-LABEL: {{^}}local_size_z:
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x8
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[2:3], 0x20
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x8
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[4:5], 0x20
; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
; GCN-NOHSA: buffer_store_dword [[VVAL]]

Expand Down
10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/amdhsa-kernarg-preload-num-sgprs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
; OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000 ................
; OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000 ................
; OBJDUMP-NOT: 0030 0000af00 94130000 1a000400 00000000 ................
; OBJDUMP-NEXT: 0030 4000af00 94130000 1a000400 00000000 @...............
; OBJDUMP-NEXT: 0030 8000af00 98130000 1a000400 00000000 ................

; ASM-LABEL: amdhsa_kernarg_preload_4_implicit_6:
; ASM: .amdhsa_user_sgpr_count 10
; ASM: .amdhsa_next_free_sgpr 10
; ASM: ; TotalNumSgprs: 16
; ASM: ; NumSGPRsForWavesPerEU: 16
; ASM: .amdhsa_user_sgpr_count 12
; ASM: .amdhsa_next_free_sgpr 12
; ASM: ; TotalNumSgprs: 18
; ASM: ; NumSGPRsForWavesPerEU: 18

; Test that we include preloaded SGPRs in the GRANULATED_WAVEFRONT_SGPR_COUNT
; feild that are not explicitly referenced in the kernel. This test has 6 implicit
Expand Down
Binary file not shown.
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/amdpal-elf.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
; ELF: Section: .text (0x2)
; ELF: }

; GFX10: NumSGPRsForWavesPerEU: 4
; GFX10: NumSGPRsForWavesPerEU: 6
; GFX10: NumVGPRsForWavesPerEU: 1

define amdgpu_kernel void @simple(ptr addrspace(1) %out) {
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AMDGPU/andorbitset.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
define amdgpu_kernel void @s_clear_msb(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_clear_msb:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_load_dword s6, s[4:5], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bitset0_b32 s4, 31
; SI-NEXT: s_and_b32 s4, s6, 0x7fffffff
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
Expand All @@ -21,12 +21,12 @@ define amdgpu_kernel void @s_clear_msb(ptr addrspace(1) %out, i32 %in) {
define amdgpu_kernel void @s_set_msb(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_set_msb:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_load_dword s6, s[4:5], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bitset1_b32 s4, 31
; SI-NEXT: s_or_b32 s4, s6, 0x80000000
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
Expand All @@ -38,12 +38,12 @@ define amdgpu_kernel void @s_set_msb(ptr addrspace(1) %out, i32 %in) {
define amdgpu_kernel void @s_clear_lsb(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_clear_lsb:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_load_dword s6, s[4:5], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_and_b32 s4, s4, -2
; SI-NEXT: s_and_b32 s4, s6, -2
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
Expand All @@ -55,12 +55,12 @@ define amdgpu_kernel void @s_clear_lsb(ptr addrspace(1) %out, i32 %in) {
define amdgpu_kernel void @s_set_lsb(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_set_lsb:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_load_dword s6, s[4:5], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_or_b32 s4, s4, 1
; SI-NEXT: s_or_b32 s4, s6, 1
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
Expand All @@ -72,12 +72,12 @@ define amdgpu_kernel void @s_set_lsb(ptr addrspace(1) %out, i32 %in) {
define amdgpu_kernel void @s_clear_midbit(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_clear_midbit:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_load_dword s6, s[4:5], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bitset0_b32 s4, 8
; SI-NEXT: s_and_b32 s4, s6, 0xfffffeff
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
Expand All @@ -89,12 +89,12 @@ define amdgpu_kernel void @s_clear_midbit(ptr addrspace(1) %out, i32 %in) {
define amdgpu_kernel void @s_set_midbit(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_set_midbit:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_load_dword s6, s[4:5], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bitset1_b32 s4, 8
; SI-NEXT: s_or_b32 s4, s6, 0x100
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AMDGPU/andorxorinvimm.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@
define amdgpu_kernel void @s_or_to_orn2(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_or_to_orn2:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_load_dword s6, s[4:5], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_orn2_b32 s4, s4, 50
; SI-NEXT: s_or_b32 s4, s6, 0xffffffcd
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
Expand All @@ -21,12 +21,12 @@ define amdgpu_kernel void @s_or_to_orn2(ptr addrspace(1) %out, i32 %in) {
define amdgpu_kernel void @s_or_to_orn2_imm0(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_or_to_orn2_imm0:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_load_dword s6, s[4:5], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_orn2_b32 s4, s4, 50
; SI-NEXT: s_or_b32 s4, s6, 0xffffffcd
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
Expand All @@ -38,12 +38,12 @@ define amdgpu_kernel void @s_or_to_orn2_imm0(ptr addrspace(1) %out, i32 %in) {
define amdgpu_kernel void @s_and_to_andn2(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_and_to_andn2:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_load_dword s6, s[4:5], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_andn2_b32 s4, s4, 50
; SI-NEXT: s_and_b32 s4, s6, 0xffffffcd
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
Expand All @@ -55,12 +55,12 @@ define amdgpu_kernel void @s_and_to_andn2(ptr addrspace(1) %out, i32 %in) {
define amdgpu_kernel void @s_and_to_andn2_imm0(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_and_to_andn2_imm0:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_load_dword s6, s[4:5], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_andn2_b32 s4, s4, 50
; SI-NEXT: s_and_b32 s4, s6, 0xffffffcd
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
Expand All @@ -72,12 +72,12 @@ define amdgpu_kernel void @s_and_to_andn2_imm0(ptr addrspace(1) %out, i32 %in) {
define amdgpu_kernel void @s_xor_to_xnor(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_xor_to_xnor:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_load_dword s6, s[4:5], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_xnor_b32 s4, s4, 50
; SI-NEXT: s_xor_b32 s4, s6, 0xffffffcd
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
Expand All @@ -89,12 +89,12 @@ define amdgpu_kernel void @s_xor_to_xnor(ptr addrspace(1) %out, i32 %in) {
define amdgpu_kernel void @s_xor_to_xnor_imm0(ptr addrspace(1) %out, i32 %in) {
; SI-LABEL: s_xor_to_xnor_imm0:
; SI: ; %bb.0:
; SI-NEXT: s_load_dword s4, s[2:3], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; SI-NEXT: s_load_dword s6, s[4:5], 0xb
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_xnor_b32 s4, s4, 50
; SI-NEXT: s_xor_b32 s4, s6, 0xffffffcd
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
Expand Down
64 changes: 32 additions & 32 deletions llvm/test/CodeGen/AMDGPU/anyext.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,25 +9,25 @@ declare i32 @llvm.amdgcn.workitem.id.y() nounwind readnone
define amdgpu_kernel void @anyext_i1_i32(ptr addrspace(1) %out, i32 %cond) #0 {
; GCN-LABEL: anyext_i1_i32:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dword s4, s[2:3], 0xb
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x9
; GCN-NEXT: s_load_dword s6, s[4:5], 0xb
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s4, 0
; GCN-NEXT: s_cmp_lg_u32 s6, 0
; GCN-NEXT: s_cselect_b64 s[4:5], -1, 0
; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: s_endpgm
;
; GFX8-LABEL: anyext_i1_i32:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dword s4, s[2:3], 0x2c
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: s_cmp_eq_u32 s4, 0
; GFX8-NEXT: s_cmp_eq_u32 s6, 0
; GFX8-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX8-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX8-NEXT: v_not_b32_e32 v0, v0
Expand All @@ -37,12 +37,12 @@ define amdgpu_kernel void @anyext_i1_i32(ptr addrspace(1) %out, i32 %cond) #0 {
;
; GFX9-LABEL: anyext_i1_i32:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dword s4, s[2:3], 0x2c
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
; GFX9-NEXT: s_load_dword s6, s[4:5], 0x2c
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_cmp_eq_u32 s4, 0
; GFX9-NEXT: s_cmp_eq_u32 s6, 0
; GFX9-NEXT: s_cselect_b64 s[4:5], -1, 0
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
; GFX9-NEXT: v_not_b32_e32 v0, v0
Expand All @@ -62,23 +62,23 @@ entry:
define amdgpu_kernel void @s_anyext_i16_i32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b) #0 {
; GCN-LABEL: s_anyext_i16_i32:
; GCN: ; %bb.0: ; %entry
; GCN-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x9
; GCN-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0xd
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
; GCN-NEXT: s_mov_b32 s11, 0xf000
; GCN-NEXT: s_mov_b32 s14, 0
; GCN-NEXT: s_mov_b32 s15, s11
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_mov_b64 s[12:13], s[6:7]
; GCN-NEXT: s_mov_b64 s[12:13], s[2:3]
; GCN-NEXT: v_lshlrev_b32_e32 v2, 1, v0
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 1, v1
; GCN-NEXT: s_mov_b64 s[2:3], s[14:15]
; GCN-NEXT: s_mov_b64 s[6:7], s[14:15]
; GCN-NEXT: v_mov_b32_e32 v1, v3
; GCN-NEXT: buffer_load_ushort v2, v[2:3], s[12:15], 0 addr64
; GCN-NEXT: buffer_load_ushort v0, v[0:1], s[0:3], 0 addr64
; GCN-NEXT: buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64
; GCN-NEXT: s_mov_b32 s10, -1
; GCN-NEXT: s_mov_b32 s8, s4
; GCN-NEXT: s_mov_b32 s9, s5
; GCN-NEXT: s_mov_b32 s8, s0
; GCN-NEXT: s_mov_b32 s9, s1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_add_i32_e32 v0, vcc, v2, v0
; GCN-NEXT: v_not_b32_e32 v0, v0
Expand All @@ -88,46 +88,46 @@ define amdgpu_kernel void @s_anyext_i16_i32(ptr addrspace(1) %out, ptr addrspace
;
; GFX8-LABEL: s_anyext_i16_i32:
; GFX8: ; %bb.0: ; %entry
; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX8-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX8-NEXT: s_waitcnt lgkmcnt(0)
; GFX8-NEXT: v_mov_b32_e32 v3, s7
; GFX8-NEXT: v_add_u32_e32 v2, vcc, s6, v0
; GFX8-NEXT: v_mov_b32_e32 v3, s3
; GFX8-NEXT: v_add_u32_e32 v2, vcc, s2, v0
; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v1
; GFX8-NEXT: v_mov_b32_e32 v1, s1
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s0, v0
; GFX8-NEXT: v_mov_b32_e32 v1, s5
; GFX8-NEXT: v_add_u32_e32 v0, vcc, s4, v0
; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX8-NEXT: flat_load_ushort v2, v[2:3]
; GFX8-NEXT: flat_load_ushort v0, v[0:1]
; GFX8-NEXT: s_mov_b32 s7, 0xf000
; GFX8-NEXT: s_mov_b32 s6, -1
; GFX8-NEXT: s_mov_b32 s3, 0xf000
; GFX8-NEXT: s_mov_b32 s2, -1
; GFX8-NEXT: s_waitcnt vmcnt(0)
; GFX8-NEXT: v_add_u16_e32 v0, v2, v0
; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 1, v0
; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX8-NEXT: s_endpgm
;
; GFX9-LABEL: s_anyext_i16_i32:
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x24
; GFX9-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x34
; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0
; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: global_load_ushort v2, v0, s[6:7]
; GFX9-NEXT: global_load_ushort v3, v1, s[0:1]
; GFX9-NEXT: s_mov_b32 s7, 0xf000
; GFX9-NEXT: s_mov_b32 s6, -1
; GFX9-NEXT: global_load_ushort v2, v0, s[2:3]
; GFX9-NEXT: global_load_ushort v3, v1, s[6:7]
; GFX9-NEXT: s_mov_b32 s3, 0xf000
; GFX9-NEXT: s_mov_b32 s2, -1
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: v_add_u16_e32 v0, v2, v3
; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 1, v0
; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX9-NEXT: buffer_store_dword v0, off, s[4:7], 0
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-NEXT: s_endpgm
entry:
%tid.x = call i32 @llvm.amdgcn.workitem.id.x()
Expand Down
Loading