From 987b9e121700fb578db4cbb8112f932628c9fa42 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Thu, 18 Sep 2025 10:22:06 -0700 Subject: [PATCH 1/2] [DirectX] NonUniformResourceIndex lowering Introduces `llvm.{dx|svp}.resource.nonuniformindex` intrinsic that will be used when a resource index is not guaranteed to be uniform across threads (HLSL function NonUniformResourceIndex). The DXIL lowering layer looks for this intrinsic call in the resource index calculation, makes sure it is reflected in the NonUniform flag on DXIL create handle ops (`dx.op.createHandle` and `dx.op.createHandleFromBinding`), and then removes it from the module. Closes #155701 --- llvm/include/llvm/IR/IntrinsicsDirectX.td | 4 ++ llvm/include/llvm/IR/IntrinsicsSPIRV.td | 3 + llvm/lib/Target/DirectX/DXILOpLowering.cpp | 68 ++++++++++++++++--- .../test/CodeGen/DirectX/CreateHandle-NURI.ll | 44 ++++++++++++ .../DirectX/CreateHandleFromBinding-NURI.ll | 46 +++++++++++++ 5 files changed, 155 insertions(+), 10 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll create mode 100644 llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 5d76c3f8df89d..65f199bbe90dc 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -39,6 +39,10 @@ def int_dx_resource_handlefromimplicitbinding def int_dx_resource_getpointer : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_dx_resource_nonuniformindex + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_dx_resource_load_typedbuffer : DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty], [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index bc026fa33c769..0b8258be8e1f3 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -160,6 +160,9 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty] : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty], [IntrNoMem]>; +def int_spv_resource_nonuniformindex + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + // Read a value from the image buffer. It does not translate directly to a // single OpImageRead because the result type is not necessarily a 4 element // vector. diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 577b4624458b9..610d8b63bba27 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/DXILMetadataAnalysis.h" #include "llvm/Analysis/DXILResource.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" @@ -24,6 +25,7 @@ #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/Use.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" @@ -42,6 +44,7 @@ class OpLowerer { DXILResourceTypeMap &DRTM; const ModuleMetadataInfo &MMDI; SmallVector CleanupCasts; + Function *CleanupNURI = nullptr; public: OpLowerer(Module &M, DXILResourceMap &DRM, DXILResourceTypeMap &DRTM, @@ -195,6 +198,21 @@ class OpLowerer { CleanupCasts.clear(); } + void cleanupNonUniformResourceIndexCalls() { + // Replace all NonUniformResourceIndex calls with their argument. + if (!CleanupNURI) + return; + for (User *U : make_early_inc_range(CleanupNURI->users())) { + CallInst *CI = dyn_cast(U); + if (!CI) + continue; + CI->replaceAllUsesWith(CI->getArgOperand(0)); + CI->eraseFromParent(); + } + CleanupNURI->eraseFromParent(); + CleanupNURI = nullptr; + } + // Remove the resource global associated with the handleFromBinding call // instruction and their uses as they aren't needed anymore. // TODO: We should verify that all the globals get removed. @@ -229,6 +247,31 @@ class OpLowerer { NameGlobal->removeFromParent(); } + bool hasNonUniformIndex(Value *IndexOp) { + if (isa(IndexOp)) + return false; + + SmallVector WorkList; + WorkList.push_back(IndexOp); + + while (!WorkList.empty()) { + Value *V = WorkList.pop_back_val(); + if (auto *CI = dyn_cast(V)) { + if (CI->getCalledFunction()->getIntrinsicID() == + Intrinsic::dx_resource_nonuniformindex) + return true; + } + if (auto *U = llvm::dyn_cast(V)) { + for (llvm::Value *Op : U->operands()) { + if (isa(Op)) + continue; + WorkList.push_back(Op); + } + } + } + return false; + } + [[nodiscard]] bool lowerToCreateHandle(Function &F) { IRBuilder<> &IRB = OpBuilder.getIRB(); Type *Int8Ty = IRB.getInt8Ty(); @@ -250,13 +293,12 @@ class OpLowerer { IndexOp = IRB.CreateAdd(IndexOp, ConstantInt::get(Int32Ty, Binding.LowerBound)); - // FIXME: The last argument is a NonUniform flag which needs to be set - // based on resource analysis. - // https://github.com/llvm/llvm-project/issues/155701 + bool HasNonUniformIndex = + (Binding.Size == 1) ? false : hasNonUniformIndex(IndexOp); std::array Args{ ConstantInt::get(Int8Ty, llvm::to_underlying(RC)), ConstantInt::get(Int32Ty, Binding.RecordID), IndexOp, - ConstantInt::get(Int1Ty, false)}; + ConstantInt::get(Int1Ty, HasNonUniformIndex)}; Expected OpCall = OpBuilder.tryCreateOp(OpCode::CreateHandle, Args, CI->getName()); if (Error E = OpCall.takeError()) @@ -300,11 +342,10 @@ class OpLowerer { : Binding.LowerBound + Binding.Size - 1; Constant *ResBind = OpBuilder.getResBind(Binding.LowerBound, UpperBound, Binding.Space, RC); - // FIXME: The last argument is a NonUniform flag which needs to be set - // based on resource analysis. - // https://github.com/llvm/llvm-project/issues/155701 - Constant *NonUniform = ConstantInt::get(Int1Ty, false); - std::array BindArgs{ResBind, IndexOp, NonUniform}; + bool NonUniformIndex = + (Binding.Size == 1) ? false : hasNonUniformIndex(IndexOp); + Constant *NonUniformOp = ConstantInt::get(Int1Ty, NonUniformIndex); + std::array BindArgs{ResBind, IndexOp, NonUniformOp}; Expected OpBind = OpBuilder.tryCreateOp( OpCode::CreateHandleFromBinding, BindArgs, CI->getName()); if (Error E = OpBind.takeError()) @@ -868,6 +909,11 @@ class OpLowerer { case Intrinsic::dx_resource_getpointer: HasErrors |= lowerGetPointer(F); break; + case Intrinsic::dx_resource_nonuniformindex: + assert(!CleanupNURI && + "overloaded llvm.dx.resource.nonuniformindex intrinsics?"); + CleanupNURI = &F; + break; case Intrinsic::dx_resource_load_typedbuffer: HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true); break; @@ -908,8 +954,10 @@ class OpLowerer { } Updated = true; } - if (Updated && !HasErrors) + if (Updated && !HasErrors) { cleanupHandleCasts(); + cleanupNonUniformResourceIndexCalls(); + } return Updated; } diff --git a/llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll b/llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll new file mode 100644 index 0000000000000..f981be6570cf8 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll @@ -0,0 +1,44 @@ +; RUN: opt -S -passes=dxil-op-lower %s | FileCheck %s + +target triple = "dxil-pc-shadermodel6.0-compute" + +@A.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1 + +declare i32 @some_val(); + +define void @test_buffers_with_nuri() { + + %val = call i32 @some_val() + + ; RWBuffer A[10]; + ; + ; A[NonUniformResourceIndex(val)]; + + %nuri1 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + %res1 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %nuri1, ptr @A.str) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %val, i1 true) #[[ATTR:.*]] + ; CHECK-NOT: @llvm.dx.cast.handle + ; CHECK-NOT: @llvm.dx.resource.nonuniformindex + + ; A[NonUniformResourceIndex(val + 1) % 10]; + %add1 = add i32 %val, 1 + %nuri2 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %add1) + %rem1 = urem i32 %nuri2, 10 + %res2 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %rem1, ptr @A.str) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %rem1, i1 true) #[[ATTR]] + + ; A[10 + 3 * NonUniformResourceIndex(GI)]; + %mul1 = mul i32 %nuri1, 3 + %add2 = add i32 %mul1, 10 + %res3 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %add2, ptr @A.str) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %add2, i1 true) #[[ATTR]] + + ret void +} + +; CHECK: attributes #[[ATTR]] = {{{.*}} memory(read) {{.*}}} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll new file mode 100644 index 0000000000000..6d37652acccf0 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll @@ -0,0 +1,46 @@ +; RUN: opt -S -passes=dxil-op-lower %s | FileCheck %s + +target triple = "dxil-pc-shadermodel6.6-compute" + +@A.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1 + +declare i32 @some_val(); + +define void @test_buffers_with_nuri() { + + %val = call i32 @some_val() + + ; RWBuffer A[10]; + ; + ; A[NonUniformResourceIndex(val)]; + + %nuri1 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + %res1 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %nuri1, ptr @A.str) + ; CHECK: %[[RES1:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %val, i1 true) #[[ATTR:.*]] + ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES1]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]] + ; CHECK-NOT: @llvm.dx.cast.handle + ; CHECK-NOT: @llvm.dx.resource.nonuniformindex + + ; A[NonUniformResourceIndex(val + 1) % 10]; + %add1 = add i32 %val, 1 + %nuri2 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %add1) + %rem1 = urem i32 %nuri2, 10 + %res2 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %rem1, ptr @A.str) + ; CHECK: %[[RES2:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %rem1, i1 true) #[[ATTR]] + ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES2]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]] + + ; A[10 + 3 * NonUniformResourceIndex(GI)]; + %mul1 = mul i32 %nuri1, 3 + %add2 = add i32 %mul1, 10 + %res3 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %add2, ptr @A.str) + ; CHECK: %[[RES3:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %add2, i1 true) #[[ATTR]] + ; CHECK: %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES3]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]] + ret void +} + +; CHECK: attributes #[[ATTR]] = {{{.*}} memory(none) {{.*}}} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } From d8329ec06246bb262822821e7494538007d01478 Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Mon, 22 Sep 2025 20:19:10 -0700 Subject: [PATCH 2/2] Add more test Add tests for NonUniformResourceIndex on: - value that goes through store & load - handle initialization of a single resource - value not related to resources --- .../test/CodeGen/DirectX/CreateHandle-NURI.ll | 30 ++++++++++++++++-- .../DirectX/CreateHandleFromBinding-NURI.ll | 31 +++++++++++++++++++ 2 files changed, 59 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll b/llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll index f981be6570cf8..cfa6c983df3f4 100644 --- a/llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll +++ b/llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll @@ -3,19 +3,21 @@ target triple = "dxil-pc-shadermodel6.0-compute" @A.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1 +@B.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1 declare i32 @some_val(); define void @test_buffers_with_nuri() { %val = call i32 @some_val() + %foo = alloca i32, align 4 ; RWBuffer A[10]; ; ; A[NonUniformResourceIndex(val)]; %nuri1 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) - %res1 = call target("dx.TypedBuffer", float, 1, 0, 0) + %res1 = call target("dx.TypedBuffer", float, 1, 0, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %nuri1, ptr @A.str) ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %val, i1 true) #[[ATTR:.*]] ; CHECK-NOT: @llvm.dx.cast.handle @@ -25,7 +27,7 @@ define void @test_buffers_with_nuri() { %add1 = add i32 %val, 1 %nuri2 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %add1) %rem1 = urem i32 %nuri2, 10 - %res2 = call target("dx.TypedBuffer", float, 1, 0, 0) + %res2 = call target("dx.TypedBuffer", float, 1, 0, 0) @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %rem1, ptr @A.str) ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %rem1, i1 true) #[[ATTR]] @@ -36,6 +38,30 @@ define void @test_buffers_with_nuri() { @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %add2, ptr @A.str) ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %add2, i1 true) #[[ATTR]] + ; NonUniformResourceIndex value going through store & load - the flag is not going to get picked up: + %a = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + store i32 %a, ptr %foo + %b = load i32, ptr %foo + %res4 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %b, ptr @A.str) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %b, i1 false) #[[ATTR]] + + ; NonUniformResourceIndex index value on a single resouce (not an array) - the flag is not going to get picked up: + ; + ; RWBuffer B : register(u20); + ; B[NonUniformResourceIndex(val)]; + %nuri3 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + %res5 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 20, i32 0, i32 1, i32 %nuri1, ptr @B.str) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 %val, i1 false) #[[ATTR]] + + ; NonUniformResourceIndex on unrelated value - the call is removed: + ; foo = NonUniformResourceIndex(val); + %nuri4 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + store i32 %nuri4, ptr %foo + ; CHECK: store i32 %val, ptr %foo + ; CHECK-NOT: @llvm.dx.resource.nonuniformindex + ret void } diff --git a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll index 6d37652acccf0..80bf5a6a67c91 100644 --- a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll +++ b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll @@ -3,12 +3,14 @@ target triple = "dxil-pc-shadermodel6.6-compute" @A.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1 +@B.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1 declare i32 @some_val(); define void @test_buffers_with_nuri() { %val = call i32 @some_val() + %foo = alloca i32, align 4 ; RWBuffer A[10]; ; @@ -39,6 +41,35 @@ define void @test_buffers_with_nuri() { ; CHECK: %[[RES3:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %add2, i1 true) #[[ATTR]] ; CHECK: %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES3]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]] ret void + + ; NonUniformResourceIndex value going through store & load: the flag is not going to get picked up + %a = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + store i32 %a, ptr %foo + %b = load i32, ptr %foo + %res4 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %b, ptr @A.str) + ; CHECK: %[[RES4:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %b, i1 false) #[[ATTR]] + ; CHECK: %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES4]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]] + + ; NonUniformResourceIndex index value on a single resouce (not an array): the flag is not going to get picked up + ; RWBuffer B : register(u20); + ; + ; B[NonUniformResourceIndex(val)]; + + %nuri3 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + %res5 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 20, i32 0, i32 1, i32 %nuri1, ptr @B.str) + ; CHECK: %[[RES4:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 20, i8 1 }, i32 %val, i1 false) #[[ATTR]] + ; CHECK: %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES4]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]] + + ; NonUniformResourceIndex on unrelated value - the call is removed: + ; foo = NonUniformResourceIndex(val); + %nuri4 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + store i32 %nuri4, ptr %foo + ; CHECK: store i32 %val, ptr %foo + ; CHECK-NOT: @llvm.dx.resource.nonuniformindex + + ret void } ; CHECK: attributes #[[ATTR]] = {{{.*}} memory(none) {{.*}}}