Skip to content

Commit c687660

Browse files
authored
AMDGPU: Add baseline test for split/widen invariant loads (#168913)
This works fine on main, but broke after a future patch.
1 parent 5dbe83c commit c687660

File tree

1 file changed

+77
-0
lines changed

1 file changed

+77
-0
lines changed
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
3+
4+
; Check that invariant global loads are treated like constant
5+
; loads. The optnone disables the annotation of of amdgpu.noclobber.
6+
7+
define amdgpu_kernel void @load_constant_v3i64(ptr addrspace(1) %dst, ptr addrspace(4) %src) #0 {
8+
; CHECK-LABEL: load_constant_v3i64:
9+
; CHECK: ; %bb.0:
10+
; CHECK-NEXT: v_mov_b32_e32 v4, 0
11+
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
12+
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8
13+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
14+
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
15+
; CHECK-NEXT: s_nop 0
16+
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x10
17+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
18+
; CHECK-NEXT: v_mov_b32_e32 v0, s6
19+
; CHECK-NEXT: v_mov_b32_e32 v1, s7
20+
; CHECK-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] offset:16
21+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
22+
; CHECK-NEXT: v_mov_b32_e32 v1, s1
23+
; CHECK-NEXT: v_mov_b32_e32 v2, s2
24+
; CHECK-NEXT: v_mov_b32_e32 v3, s3
25+
; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
26+
; CHECK-NEXT: s_endpgm
27+
%ld = load <3 x i64>, ptr addrspace(4) %src, align 32
28+
store <3 x i64> %ld, ptr addrspace(1) %dst, align 32
29+
ret void
30+
}
31+
32+
define amdgpu_kernel void @load_global_v3i64(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
33+
; CHECK-LABEL: load_global_v3i64:
34+
; CHECK: ; %bb.0:
35+
; CHECK-NEXT: v_mov_b32_e32 v6, 0
36+
; CHECK-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
37+
; CHECK-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x8
38+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
39+
; CHECK-NEXT: global_load_dwordx4 v[0:3], v6, s[2:3]
40+
; CHECK-NEXT: global_load_dwordx2 v[4:5], v6, s[2:3] offset:16
41+
; CHECK-NEXT: s_waitcnt vmcnt(0)
42+
; CHECK-NEXT: global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
43+
; CHECK-NEXT: global_store_dwordx4 v6, v[0:3], s[0:1]
44+
; CHECK-NEXT: s_endpgm
45+
%ld = load <3 x i64>, ptr addrspace(1) %src, align 32
46+
store <3 x i64> %ld, ptr addrspace(1) %dst, align 32
47+
ret void
48+
}
49+
50+
define amdgpu_kernel void @load_global_v3i64_invariant(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
51+
; CHECK-LABEL: load_global_v3i64_invariant:
52+
; CHECK: ; %bb.0:
53+
; CHECK-NEXT: v_mov_b32_e32 v4, 0
54+
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x0
55+
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[8:9], 0x8
56+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
57+
; CHECK-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
58+
; CHECK-NEXT: s_nop 0
59+
; CHECK-NEXT: s_load_dwordx2 s[6:7], s[6:7], 0x10
60+
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
61+
; CHECK-NEXT: v_mov_b32_e32 v0, s6
62+
; CHECK-NEXT: v_mov_b32_e32 v1, s7
63+
; CHECK-NEXT: global_store_dwordx2 v4, v[0:1], s[4:5] offset:16
64+
; CHECK-NEXT: v_mov_b32_e32 v0, s0
65+
; CHECK-NEXT: v_mov_b32_e32 v1, s1
66+
; CHECK-NEXT: v_mov_b32_e32 v2, s2
67+
; CHECK-NEXT: v_mov_b32_e32 v3, s3
68+
; CHECK-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
69+
; CHECK-NEXT: s_endpgm
70+
%ld = load <3 x i64>, ptr addrspace(1) %src, align 32, !invariant.load !0
71+
store <3 x i64> %ld, ptr addrspace(1) %dst, align 32
72+
ret void
73+
}
74+
75+
attributes #0 = { noinline nounwind optnone }
76+
77+
!0 = !{}

0 commit comments

Comments
 (0)