Skip to content

Conversation

arsenm
Copy link
Contributor

@arsenm arsenm commented Oct 8, 2025

Apparently the IR verifier doesn't enforce the correct structure.
Also I do not know why we have this extra level of wrapper in the intrinsic,
it just makes it harder to get at the string. I also do not know why
kokkos is using these intrinsics, but it shouldn't.

…ster

Apparently the IR verifier doesn't enforce the correct structure.
Also I do not know why we have this extra level of wrapper in the intrinsic,
it just makes it harder to get at the string. I also do not know why
kokkos is using these intrinsics, but it shouldn't.
@arsenm arsenm marked this pull request as ready for review October 8, 2025 02:42
Copy link
Contributor Author

arsenm commented Oct 8, 2025

This stack of pull requests is managed by Graphite. Learn more about stacking.

@llvmbot
Copy link
Member

llvmbot commented Oct 8, 2025

@llvm/pr-subscribers-backend-amdgpu

Author: Matt Arsenault (arsenm)

Changes

Apparently the IR verifier doesn't enforce the correct structure.
Also I do not know why we have this extra level of wrapper in the intrinsic,
it just makes it harder to get at the string. I also do not know why
kokkos is using these intrinsics, but it shouldn't.


Full diff: https://github.com/llvm/llvm-project/pull/162414.diff

2 Files Affected:

  • (modified) llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp (+3-1)
  • (modified) llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll (+27-14)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 388b106bab3e1..ef58004dd563c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -1335,7 +1335,9 @@ struct AAAMDGPUNoAGPR : public StateWrapper<BooleanState, AbstractAttribute> {
       case Intrinsic::read_register:
       case Intrinsic::read_volatile_register: {
         const MDString *RegName = cast<MDString>(
-            cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata());
+            cast<MDNode>(
+                cast<MetadataAsValue>(CB.getArgOperand(0))->getMetadata())
+                ->getOperand(0));
         auto [Kind, RegIdx, NumRegs] =
             AMDGPU::parseAsmPhysRegName(RegName->getString());
         return Kind != 'a';
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 48884b22c63cf..2ad6e684a7092 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -704,41 +704,41 @@ define amdgpu_kernel void @align2_align4_virtreg() {
 define amdgpu_kernel void @kernel_uses_write_register_a55() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55(
 ; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
-; CHECK-NEXT:    call void @llvm.write_register.i32(metadata !"a55", i32 0)
+; CHECK-NEXT:    call void @llvm.write_register.i32(metadata [[META0:![0-9]+]], i32 0)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.write_register.i64(metadata !"a55", i32 0)
+  call void @llvm.write_register.i64(metadata !0, i32 0)
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_write_register_v55() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55(
 ; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
-; CHECK-NEXT:    call void @llvm.write_register.i32(metadata !"v55", i32 0)
+; CHECK-NEXT:    call void @llvm.write_register.i32(metadata [[META1:![0-9]+]], i32 0)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.write_register.i64(metadata !"v55", i32 0)
+  call void @llvm.write_register.i64(metadata !1, i32 0)
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
 ; CHECK-SAME: ) #[[ATTR3]] {
-; CHECK-NEXT:    call void @llvm.write_register.i96(metadata !"a[55:57]", i96 0)
+; CHECK-NEXT:    call void @llvm.write_register.i96(metadata [[META2:![0-9]+]], i96 0)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.write_register.i64(metadata !"a[55:57]", i96 0)
+  call void @llvm.write_register.i64(metadata !2, i96 0)
   ret void
 }
 
 define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT:    [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata !"a55")
+; CHECK-NEXT:    [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]])
 ; CHECK-NEXT:    store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %reg = call i32 @llvm.read_register.i64(metadata !"a55")
+  %reg = call i32 @llvm.read_register.i64(metadata !0)
   store i32 %reg, ptr addrspace(1) %ptr
   ret void
 }
@@ -746,11 +746,11 @@ define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr)
 define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT:    [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata !"a55")
+; CHECK-NEXT:    [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata [[META0]])
 ; CHECK-NEXT:    store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
 ; CHECK-NEXT:    ret void
 ;
-  %reg = call i32 @llvm.read_volatile_register.i64(metadata !"a55")
+  %reg = call i32 @llvm.read_volatile_register.i64(metadata !0)
   store i32 %reg, ptr addrspace(1) %ptr
   ret void
 }
@@ -758,11 +758,11 @@ define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(
 define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
 ; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT:    [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata !"a[56:59]")
+; CHECK-NEXT:    [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata [[META3:![0-9]+]])
 ; CHECK-NEXT:    store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  %reg = call i128 @llvm.read_register.i64(metadata !"a[56:59]")
+  %reg = call i128 @llvm.read_register.i64(metadata !3)
   store i128 %reg, ptr addrspace(1) %ptr
   ret void
 }
@@ -770,14 +770,21 @@ define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %pt
 define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
 ; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
 ; CHECK-SAME: ) #[[ATTR3]] {
-; CHECK-NEXT:    call void @llvm.write_register.i32(metadata !"a256", i32 0)
+; CHECK-NEXT:    call void @llvm.write_register.i32(metadata [[META4:![0-9]+]], i32 0)
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.write_register.i64(metadata !"a256", i32 0)
+  call void @llvm.write_register.i64(metadata !4, i32 0)
   ret void
 }
 
 attributes #0 = { "amdgpu-agpr-alloc"="0" }
+
+!0 = !{!"a55"}
+!1 = !{!"v55"}
+!2 = !{!"a[55:57]"}
+!3 = !{!"a[56:59]"}
+!4 = !{!"a256"}
+
 ;.
 ; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
 ; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
@@ -791,3 +798,9 @@ attributes #0 = { "amdgpu-agpr-alloc"="0" }
 ; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
 ; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" }
 ;.
+; CHECK: [[META0]] = !{!"a55"}
+; CHECK: [[META1]] = !{!"v55"}
+; CHECK: [[META2]] = !{!"a[55:57]"}
+; CHECK: [[META3]] = !{!"a[56:59]"}
+; CHECK: [[META4]] = !{!"a256"}
+;.

@arsenm arsenm enabled auto-merge (squash) October 8, 2025 02:43
@arsenm arsenm merged commit 96e0bbc into main Oct 8, 2025
11 of 13 checks passed
@arsenm arsenm deleted the users/arsenm/amdgpu/fix-attributor-read_register-format branch October 8, 2025 03:13
svkeerthy pushed a commit that referenced this pull request Oct 9, 2025
…ster (#162414)

Apparently the IR verifier doesn't enforce the correct structure.
Also I do not know why we have this extra level of wrapper in the
intrinsic,
it just makes it harder to get at the string. I also do not know why
kokkos is using these intrinsics, but it shouldn't.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants