229 changes: 126 additions & 103 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll

Large diffs are not rendered by default.

308 changes: 273 additions & 35 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir

Large diffs are not rendered by default.

216 changes: 188 additions & 28 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
Original file line number Diff line number Diff line change
Expand Up @@ -748,11 +748,11 @@ define amdgpu_kernel void @test_div_fmas_f64(ptr addrspace(1) %out, double %a, d
; GFX7-NEXT: v_mov_b32_e32 v3, s9
; GFX7-NEXT: v_mov_b32_e32 v5, s11
; GFX7-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0
; GFX7-NEXT: s_nop 3
; GFX7-NEXT: s_mov_b32 s6, -1
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_nop 1
; GFX7-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX7-NEXT: v_mov_b32_e32 v2, s4
; GFX7-NEXT: v_mov_b32_e32 v3, s5
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: test_div_fmas_f64:
Expand Down
162 changes: 78 additions & 84 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.scale.ll
Original file line number Diff line number Diff line change
Expand Up @@ -174,24 +174,21 @@ define amdgpu_kernel void @test_div_scale_f32_2(ptr addrspace(1) %out, ptr addrs
define amdgpu_kernel void @test_div_scale_f64_1(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %in) {
; GFX7-LABEL: test_div_scale_f64_1:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s2
; GFX7-NEXT: v_mov_b32_e32 v1, s3
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: v_add_i32_e32 v2, vcc, 8, v0
; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc
; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc
; GFX7-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[2:3] glc
; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:8 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[2:3], v[2:3], v[0:1]
; GFX7-NEXT: v_mov_b32_e32 v3, s1
; GFX7-NEXT: v_mov_b32_e32 v2, s0
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; GFX7-NEXT: s_mov_b32 s6, -1
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[0:1], v[0:1], v[2:3]
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: test_div_scale_f64_1:
Expand Down Expand Up @@ -263,24 +260,21 @@ define amdgpu_kernel void @test_div_scale_f64_1(ptr addrspace(1) %out, ptr addrs
define amdgpu_kernel void @test_div_scale_f64_2(ptr addrspace(1) %out, ptr addrspace(1) %aptr, ptr addrspace(1) %in) {
; GFX7-LABEL: test_div_scale_f64_2:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0xd
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xd
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: s_mov_b32 s6, 0
; GFX7-NEXT: s_mov_b32 s7, 0xf000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s2
; GFX7-NEXT: v_mov_b32_e32 v1, s3
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: v_add_i32_e32 v2, vcc, 8, v0
; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v1, vcc
; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1] glc
; GFX7-NEXT: buffer_load_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: flat_load_dwordx2 v[2:3], v[2:3] glc
; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 offset:8 glc
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[2:3], v[0:1]
; GFX7-NEXT: v_mov_b32_e32 v3, s1
; GFX7-NEXT: v_mov_b32_e32 v2, s0
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; GFX7-NEXT: s_mov_b32 s6, -1
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[2:3], v[0:1], v[2:3]
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: test_div_scale_f64_2:
Expand Down Expand Up @@ -649,19 +643,19 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_num_1(ptr addrspace(1) %out
; GFX7-LABEL: test_div_scale_f64_scalar_num_1:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x15
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x15
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s6
; GFX7-NEXT: v_mov_b32_e32 v1, s7
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
; GFX7-NEXT: v_mov_b32_e32 v2, s4
; GFX7-NEXT: v_mov_b32_e32 v3, s5
; GFX7-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[0:1], v[0:1], s[0:1]
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[0:1], v[0:1], s[8:9]
; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: test_div_scale_f64_scalar_num_1:
Expand Down Expand Up @@ -724,19 +718,19 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_num_2(ptr addrspace(1) %out
; GFX7-LABEL: test_div_scale_f64_scalar_num_2:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x15
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x15
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s6
; GFX7-NEXT: v_mov_b32_e32 v1, s7
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
; GFX7-NEXT: v_mov_b32_e32 v2, s4
; GFX7-NEXT: v_mov_b32_e32 v3, s5
; GFX7-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], s[0:1], v[0:1], s[0:1]
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], s[8:9], v[0:1], s[8:9]
; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: test_div_scale_f64_scalar_num_2:
Expand Down Expand Up @@ -799,19 +793,19 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_den_1(ptr addrspace(1) %out
; GFX7-LABEL: test_div_scale_f64_scalar_den_1:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x15
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x15
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s6
; GFX7-NEXT: v_mov_b32_e32 v1, s7
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
; GFX7-NEXT: v_mov_b32_e32 v2, s4
; GFX7-NEXT: v_mov_b32_e32 v3, s5
; GFX7-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], s[0:1], s[0:1], v[0:1]
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], s[8:9], s[8:9], v[0:1]
; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: test_div_scale_f64_scalar_den_1:
Expand Down Expand Up @@ -874,19 +868,19 @@ define amdgpu_kernel void @test_div_scale_f64_scalar_den_2(ptr addrspace(1) %out
; GFX7-LABEL: test_div_scale_f64_scalar_den_2:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x9
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x15
; GFX7-NEXT: v_lshlrev_b32_e32 v2, 3, v0
; GFX7-NEXT: s_load_dwordx2 s[8:9], s[0:1], 0x15
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 3, v0
; GFX7-NEXT: v_mov_b32_e32 v1, 0
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s6
; GFX7-NEXT: v_mov_b32_e32 v1, s7
; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; GFX7-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
; GFX7-NEXT: v_mov_b32_e32 v2, s4
; GFX7-NEXT: v_mov_b32_e32 v3, s5
; GFX7-NEXT: s_mov_b64 s[0:1], s[6:7]
; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: s_mov_b64 s[6:7], s[2:3]
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[0:1], s[0:1], v[0:1]
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[0:1], v[0:1], s[8:9], v[0:1]
; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: test_div_scale_f64_scalar_den_2:
Expand Down Expand Up @@ -1071,9 +1065,9 @@ define amdgpu_kernel void @test_div_scale_f64_all_scalar_1(ptr addrspace(1) %out
; GFX7-NEXT: v_mov_b32_e32 v0, s2
; GFX7-NEXT: v_mov_b32_e32 v1, s3
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[0:1], s[4:5]
; GFX7-NEXT: v_mov_b32_e32 v3, s1
; GFX7-NEXT: v_mov_b32_e32 v2, s0
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: test_div_scale_f64_all_scalar_1:
Expand Down Expand Up @@ -1131,9 +1125,9 @@ define amdgpu_kernel void @test_div_scale_f64_all_scalar_2(ptr addrspace(1) %out
; GFX7-NEXT: v_mov_b32_e32 v0, s2
; GFX7-NEXT: v_mov_b32_e32 v1, s3
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], s[4:5], v[0:1], s[4:5]
; GFX7-NEXT: v_mov_b32_e32 v3, s1
; GFX7-NEXT: v_mov_b32_e32 v2, s0
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: test_div_scale_f64_all_scalar_2:
Expand Down Expand Up @@ -1644,14 +1638,14 @@ define amdgpu_kernel void @test_div_scale_f32_undef_undef_val(ptr addrspace(1) %
define amdgpu_kernel void @test_div_scale_f64_val_undef_val(ptr addrspace(1) %out) #0 {
; GFX7-LABEL: test_div_scale_f64_val_undef_val:
; GFX7: ; %bb.0:
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7-NEXT: s_mov_b32 s2, 0
; GFX7-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9
; GFX7-NEXT: s_mov_b32 s3, 0x40200000
; GFX7-NEXT: v_div_scale_f64 v[0:1], s[2:3], v[0:1], v[0:1], s[2:3]
; GFX7-NEXT: s_mov_b32 s2, -1
; GFX7-NEXT: s_mov_b32 s3, 0xf000
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v3, s1
; GFX7-NEXT: v_mov_b32_e32 v2, s0
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX7-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: test_div_scale_f64_val_undef_val:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ define amdgpu_kernel void @set_inactive_64(ptr addrspace(1) %out, i64 %in) {
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: s_not_b64 exec, exec
; GCN-NEXT: v_mov_b32_e32 v3, s1
; GCN-NEXT: v_mov_b32_e32 v2, s0
; GCN-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GCN-NEXT: s_mov_b32 s2, -1
; GCN-NEXT: s_mov_b32 s3, 0xf000
; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
; GCN-NEXT: s_endpgm
%tmp = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) #0
store i64 %tmp, ptr addrspace(1) %out
Expand Down
303 changes: 301 additions & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll

Large diffs are not rendered by default.

15 changes: 6 additions & 9 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/shl-ext-reduce.ll
Original file line number Diff line number Diff line change
Expand Up @@ -329,16 +329,13 @@ define amdgpu_kernel void @muli24_shl64(ptr addrspace(1) nocapture %arg, ptr add
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: s_mov_b64 s[4:5], s[2:3]
; GFX7-NEXT: buffer_load_dword v1, v[1:2], s[4:7], 0 addr64
; GFX7-NEXT: v_lshlrev_b32_e32 v5, 3, v0
; GFX7-NEXT: v_mov_b32_e32 v4, s1
; GFX7-NEXT: v_mov_b32_e32 v3, s0
; GFX7-NEXT: s_mov_b64 s[2:3], s[6:7]
; GFX7-NEXT: s_waitcnt vmcnt(0)
; GFX7-NEXT: v_or_b32_e32 v0, 0xff800000, v1
; GFX7-NEXT: v_mul_i32_i24_e32 v1, -7, v0
; GFX7-NEXT: v_lshl_b64 v[0:1], v[1:2], 3
; GFX7-NEXT: v_add_i32_e32 v2, vcc, v3, v5
; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v4, vcc
; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX7-NEXT: v_or_b32_e32 v1, 0xff800000, v1
; GFX7-NEXT: v_mul_i32_i24_e32 v1, -7, v1
; GFX7-NEXT: v_lshl_b64 v[3:4], v[1:2], 3
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 3, v0
; GFX7-NEXT: buffer_store_dwordx2 v[3:4], v[1:2], s[0:3], 0 addr64
; GFX7-NEXT: s_endpgm
;
; GFX8-LABEL: muli24_shl64:
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AMDGPU/GlobalISel/smrd.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
; FIXME: Need to add support for mubuf stores to enable this on SI.
; XUN: llc < %s -march=amdgcn -mcpu=tahiti -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefixes=SI,GCN %s
; RUN: llc < %s -march=amdgcn -mcpu=tahiti -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefixes=SI,GCN %s
; RUN: llc < %s -march=amdgcn -mcpu=bonaire -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefixes=CI,GCN,SICIVI %s
; RUN: llc < %s -march=amdgcn -mcpu=tonga -show-mc-encoding -verify-machineinstrs -global-isel | FileCheck --check-prefixes=VI,GCN,SICIVI %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -show-mc-encoding -verify-machineinstrs -global-isel < %s | FileCheck --check-prefixes=GFX9_10,GCN,VIGFX9_10,SIVIGFX9_10 %s
Expand Down
3,499 changes: 2,338 additions & 1,161 deletions llvm/test/CodeGen/AMDGPU/fmed3.ll

Large diffs are not rendered by default.

169 changes: 151 additions & 18 deletions llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -global-isel=0 -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=SI-SDAG %s
; TODO: Crashes on selecting G_STORE.
; RUN: not --crash llc -amdgpu-scalarize-global-loads=false -march=amdgcn -global-isel=1 -verify-machineinstrs -enable-unsafe-fp-math < %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -global-isel=1 -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI-GISEL %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-SDAG %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=fiji -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s
; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SDAG %s
Expand All @@ -28,6 +27,18 @@ define amdgpu_kernel void @fptrunc_f32_to_f16(
; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: fptrunc_f32_to_f16:
; SI-GISEL: ; %bb.0: ; %entry
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s3
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: fptrunc_f32_to_f16:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
Expand Down Expand Up @@ -150,6 +161,19 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: fptrunc_f64_to_f16:
; SI-GISEL: ; %bb.0: ; %entry
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: fptrunc_f64_to_f16:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
Expand Down Expand Up @@ -282,6 +306,21 @@ define amdgpu_kernel void @fptrunc_v2f32_to_v2f16(
; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: fptrunc_v2f32_to_v2f16:
; SI-GISEL: ; %bb.0: ; %entry
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s4
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, s5
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: fptrunc_v2f32_to_v2f16:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
Expand Down Expand Up @@ -311,10 +350,10 @@ define amdgpu_kernel void @fptrunc_v2f32_to_v2f16(
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3
; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v2, v0, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: s_mov_b32 s2, -1
; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; VI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-GISEL-NEXT: s_endpgm
;
; GFX9-SDAG-LABEL: fptrunc_v2f32_to_v2f16:
Expand Down Expand Up @@ -345,9 +384,10 @@ define amdgpu_kernel void @fptrunc_v2f32_to_v2f16(
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v1, s3
; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-GISEL-NEXT: s_endpgm
;
; GFX11-SDAG-LABEL: fptrunc_v2f32_to_v2f16:
Expand Down Expand Up @@ -381,10 +421,11 @@ define amdgpu_kernel void @fptrunc_v2f32_to_v2f16(
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v1, s3
; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
Expand Down Expand Up @@ -421,6 +462,23 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
; SI-GISEL: ; %bb.0: ; %entry
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; SI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
Expand Down Expand Up @@ -448,15 +506,15 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
; VI-GISEL-NEXT: s_mov_b32 s2, -1
; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
; VI-GISEL-NEXT: v_or_b32_e32 v2, v0, v1
; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0
; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1
; VI-GISEL-NEXT: flat_store_dword v[0:1], v2
; VI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1
; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-GISEL-NEXT: s_endpgm
;
; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
Expand Down Expand Up @@ -486,14 +544,15 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9-GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-GISEL-NEXT: s_endpgm
;
; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
Expand Down Expand Up @@ -528,6 +587,8 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[0:1], 0x24
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
Expand All @@ -536,8 +597,7 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
; GFX11-GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX11-GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-NEXT: s_nop 0
; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11-GISEL-NEXT: s_endpgm
Expand Down Expand Up @@ -569,6 +629,18 @@ define amdgpu_kernel void @fneg_fptrunc_f32_to_f16(
; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: fneg_fptrunc_f32_to_f16:
; SI-GISEL: ; %bb.0: ; %entry
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -s3
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: fneg_fptrunc_f32_to_f16:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
Expand Down Expand Up @@ -691,6 +763,18 @@ define amdgpu_kernel void @fabs_fptrunc_f32_to_f16(
; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: fabs_fptrunc_f32_to_f16:
; SI-GISEL: ; %bb.0: ; %entry
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s3|
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: fabs_fptrunc_f32_to_f16:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
Expand Down Expand Up @@ -813,6 +897,18 @@ define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16(
; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16:
; SI-GISEL: ; %bb.0: ; %entry
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -|s3|
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
Expand Down Expand Up @@ -936,6 +1032,18 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32:
; SI-GISEL: ; %bb.0: ; %entry
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s3
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
Expand Down Expand Up @@ -1063,6 +1171,18 @@ define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
; SI-GISEL: ; %bb.0: ; %entry
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s3|
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
Expand Down Expand Up @@ -1192,6 +1312,19 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32(
; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32:
; SI-GISEL: ; %bb.0: ; %entry
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s3
; SI-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
; VI-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24
Expand Down
5 changes: 3 additions & 2 deletions llvm/test/CodeGen/AMDGPU/v_pack.ll
Original file line number Diff line number Diff line change
Expand Up @@ -131,9 +131,10 @@ define amdgpu_kernel void @fptrunc(
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
; GISEL-NEXT: v_cvt_f16_f32_e32 v1, s3
; GISEL-NEXT: s_mov_b32 s2, -1
; GISEL-NEXT: s_mov_b32 s3, 0x31016000
; GISEL-NEXT: v_pack_b32_f16 v0, v0, v1
; GISEL-NEXT: v_mov_b32_e32 v1, 0
; GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GISEL-NEXT: s_endpgm
ptr addrspace(1) %r,
ptr addrspace(1) %a) {
Expand Down