Skip to content

Conversation

@arsenm
Copy link
Contributor

@arsenm arsenm commented Nov 20, 2025

This works fine on main, but broke after a future patch.

@llvmbot
Copy link
Member

llvmbot commented Nov 20, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/168913.diff

1 Files Affected:

  • (added) llvm/test/CodeGen/AMDGPU/load-global-invariant.ll (+70)
diff --git a/llvm/test/CodeGen/AMDGPU/load-global-invariant.ll b/llvm/test/CodeGen/AMDGPU/load-global-invariant.ll
new file mode 100644
index 0000000000000..b881edde0f448
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/load-global-invariant.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
+; Check that invariant global loads are treated like constant
+; loads. The optnone disables the annotation of of amdgpu.noclobber.
+
+define amdgpu_kernel void @load_constant_v3i64(ptr addrspace(1) %dst, ptr addrspace(4) %src) #0 {
+; CHECK-LABEL: load_constant_v3i64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_mov_b32_e32 v4, 0
+; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[8:9], 0x0
+; CHECK-NEXT:    s_load_dwordx2 s[6:7], s[8:9], 0x8
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    s_load_dwordx4 s[0:3], s[6:7], 0x0
+; CHECK-NEXT:    s_nop 0
+; CHECK-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x10
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    v_mov_b32_e32 v0, s6
+; CHECK-NEXT:    v_mov_b32_e32 v1, s7
+; CHECK-NEXT:    global_store_dwordx2 v4, v[0:1], s[4:5] offset:16
+; CHECK-NEXT:    v_mov_b32_e32 v0, s0
+; CHECK-NEXT:    v_mov_b32_e32 v1, s1
+; CHECK-NEXT:    v_mov_b32_e32 v2, s2
+; CHECK-NEXT:    v_mov_b32_e32 v3, s3
+; CHECK-NEXT:    global_store_dwordx4 v4, v[0:3], s[4:5]
+; CHECK-NEXT:    s_endpgm
+  %ld = load <3 x i64>, ptr addrspace(4) %src, align 32
+  store <3 x i64> %ld, ptr addrspace(1) %dst, align 32
+  ret void
+}
+
+define amdgpu_kernel void @load_global_v3i64(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+; CHECK-LABEL: load_global_v3i64:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_mov_b32_e32 v6, 0
+; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; CHECK-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x8
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    global_load_dwordx4 v[0:3], v6, s[2:3]
+; CHECK-NEXT:    global_load_dwordx2 v[4:5], v6, s[2:3] offset:16
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
+; CHECK-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1]
+; CHECK-NEXT:    s_endpgm
+  %ld = load <3 x i64>, ptr addrspace(1) %src, align 32
+  store <3 x i64> %ld, ptr addrspace(1) %dst, align 32
+  ret void
+}
+
+define amdgpu_kernel void @load_global_v3i64_invariant(ptr addrspace(1) %dst, ptr addrspace(1) %src) #0 {
+; CHECK-LABEL: load_global_v3i64_invariant:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_mov_b32_e32 v6, 0
+; CHECK-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
+; CHECK-NEXT:    s_load_dwordx2 s[2:3], s[8:9], 0x8
+; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
+; CHECK-NEXT:    global_load_dwordx4 v[0:3], v6, s[2:3]
+; CHECK-NEXT:    global_load_dwordx2 v[4:5], v6, s[2:3] offset:16
+; CHECK-NEXT:    s_waitcnt vmcnt(0)
+; CHECK-NEXT:    global_store_dwordx2 v6, v[4:5], s[0:1] offset:16
+; CHECK-NEXT:    global_store_dwordx4 v6, v[0:3], s[0:1]
+; CHECK-NEXT:    s_endpgm
+  %ld = load <3 x i64>, ptr addrspace(1) %src, align 32, !invariant.load !0
+  store <3 x i64> %ld, ptr addrspace(1) %dst, align 32
+  ret void
+}
+
+attributes #0 = { noinline nounwind optnone }
+
+!0 = !{}

@github-actions
Copy link

github-actions bot commented Nov 20, 2025

🐧 Linux x64 Test Results

  • 186428 tests passed
  • 4868 tests skipped

Base automatically changed from users/arsenm/amdgpu/handle-invariant-load-isUniformLoad to main November 20, 2025 18:07
@arsenm arsenm force-pushed the users/arsenm/amdgpu/add-baseline-test-invariant-vector-load branch from dd3f339 to 1c8ddb0 Compare November 20, 2025 18:59
@arsenm arsenm force-pushed the users/arsenm/amdgpu/add-baseline-test-invariant-vector-load branch from 1c8ddb0 to a8b806c Compare November 21, 2025 00:58
@arsenm arsenm merged commit c687660 into main Nov 21, 2025
10 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/add-baseline-test-invariant-vector-load branch November 21, 2025 18:53
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants