From 37f307784e9743360612a241baaffc86f205f340 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Sun, 5 Oct 2025 10:17:18 +0900 Subject: [PATCH] AMDGPU: Account for read/write register intrinsics for AGPR usage Fix the special case intrinsics that can directly reference a physical register. There's no reason to use this. --- llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 19 +++- .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 15 +-- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 7 ++ .../AMDGPU/amdgpu-attributor-no-agpr.ll | 91 ++++++++++++++++++- 4 files changed, 118 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp index a52c79f4302d1..388b106bab3e1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp @@ -1328,10 +1328,23 @@ struct AAAMDGPUNoAGPR : public StateWrapper { return false; } - // Some intrinsics may use AGPRs, but if we have a choice, we are not - // required to use AGPRs. - if (Callee->isIntrinsic()) + switch (Callee->getIntrinsicID()) { + case Intrinsic::not_intrinsic: + break; + case Intrinsic::write_register: + case Intrinsic::read_register: + case Intrinsic::read_volatile_register: { + const MDString *RegName = cast( + cast(CB.getArgOperand(0))->getMetadata()); + auto [Kind, RegIdx, NumRegs] = + AMDGPU::parseAsmPhysRegName(RegName->getString()); + return Kind != 'a'; + } + default: + // Some intrinsics may use AGPRs, but if we have a choice, we are not + // required to use AGPRs. return true; + } // TODO: Handle callsite attributes const auto *CalleeInfo = A.getAAFor( diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index f7f4d46b4d39c..7a9ccd99173c6 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -1569,12 +1569,7 @@ static bool isValidRegPrefix(char C) { return C == 'v' || C == 's' || C == 'a'; } -std::tuple -parseAsmConstraintPhysReg(StringRef Constraint) { - StringRef RegName = Constraint; - if (!RegName.consume_front("{") || !RegName.consume_back("}")) - return {}; - +std::tuple parseAsmPhysRegName(StringRef RegName) { char Kind = RegName.front(); if (!isValidRegPrefix(Kind)) return {}; @@ -1601,6 +1596,14 @@ parseAsmConstraintPhysReg(StringRef Constraint) { return {}; } +std::tuple +parseAsmConstraintPhysReg(StringRef Constraint) { + StringRef RegName = Constraint; + if (!RegName.consume_front("{") || !RegName.consume_back("}")) + return {}; + return parseAsmPhysRegName(RegName); +} + std::pair getIntegerPairAttribute(const Function &F, StringRef Name, std::pair Default, diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h index 2b9c063f42a5e..b656414f2b85c 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h @@ -1013,6 +1013,13 @@ bool isReadOnlySegment(const GlobalValue *GV); /// target triple \p TT, false otherwise. bool shouldEmitConstantsToTextSection(const Triple &TT); +/// Returns a valid charcode or 0 in the first entry if this is a valid physical +/// register name. Followed by the start register number, and the register +/// width. Does not validate the number of registers exists in the class. Unlike +/// parseAsmConstraintPhysReg, this does not expect the name to be wrapped in +/// "{}". +std::tuple parseAsmPhysRegName(StringRef TupleString); + /// Returns a valid charcode or 0 in the first entry if this is a valid physical /// register constraint. Followed by the start register number, and the register /// width. Does not validate the number of registers exists in the class. diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll index b8126caa8b80c..48884b22c63cf 100644 --- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll @@ -181,7 +181,7 @@ define amdgpu_kernel void @kernel_calls_extern() { define amdgpu_kernel void @kernel_calls_extern_marked_callsite() { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite( ; CHECK-SAME: ) #[[ATTR1]] { -; CHECK-NEXT: call void @unknown() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]] ; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; @@ -205,7 +205,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) { define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) { ; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite( ; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR5]] +; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]] ; CHECK-NEXT: call void @use_most() ; CHECK-NEXT: ret void ; @@ -701,12 +701,93 @@ define amdgpu_kernel void @align2_align4_virtreg() { ret void } +define amdgpu_kernel void @kernel_uses_write_register_a55() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55( +; CHECK-SAME: ) #[[ATTR3:[0-9]+]] { +; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a55", i32 0) +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !"a55", i32 0) + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_v55() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55( +; CHECK-SAME: ) #[[ATTR4:[0-9]+]] { +; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"v55", i32 0) +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !"v55", i32 0) + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_a55_57() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57( +; CHECK-SAME: ) #[[ATTR3]] { +; CHECK-NEXT: call void @llvm.write_register.i96(metadata !"a[55:57]", i96 0) +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !"a[55:57]", i96 0) + ret void +} + +define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata !"a55") +; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: ret void +; + %reg = call i32 @llvm.read_register.i64(metadata !"a55") + store i32 %reg, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata !"a55") +; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4 +; CHECK-NEXT: ret void +; + %reg = call i32 @llvm.read_volatile_register.i64(metadata !"a55") + store i32 %reg, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59( +; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] { +; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata !"a[56:59]") +; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8 +; CHECK-NEXT: ret void +; + %reg = call i128 @llvm.read_register.i64(metadata !"a[56:59]") + store i128 %reg, ptr addrspace(1) %ptr + ret void +} + +define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() { +; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256( +; CHECK-SAME: ) #[[ATTR3]] { +; CHECK-NEXT: call void @llvm.write_register.i32(metadata !"a256", i32 0) +; CHECK-NEXT: ret void +; + call void @llvm.write_register.i64(metadata !"a256", i32 0) + ret void +} + attributes #0 = { "amdgpu-agpr-alloc"="0" } ;. ; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" } ; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } -; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" } +; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" } +; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" } ;.