diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 5d76c3f8df89d..65f199bbe90dc 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -39,6 +39,10 @@ def int_dx_resource_handlefromimplicitbinding def int_dx_resource_getpointer : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty], [IntrNoMem]>; + +def int_dx_resource_nonuniformindex + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_dx_resource_load_typedbuffer : DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty], [llvm_any_ty, llvm_i32_ty], [IntrReadMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index bc026fa33c769..0b8258be8e1f3 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -160,6 +160,9 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty] : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty], [IntrNoMem]>; +def int_spv_resource_nonuniformindex + : DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; + // Read a value from the image buffer. It does not translate directly to a // single OpImageRead because the result type is not necessarily a 4 element // vector. diff --git a/llvm/lib/Target/DirectX/DXILOpLowering.cpp b/llvm/lib/Target/DirectX/DXILOpLowering.cpp index 577b4624458b9..610d8b63bba27 100644 --- a/llvm/lib/Target/DirectX/DXILOpLowering.cpp +++ b/llvm/lib/Target/DirectX/DXILOpLowering.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/DXILMetadataAnalysis.h" #include "llvm/Analysis/DXILResource.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" @@ -24,6 +25,7 @@ #include "llvm/IR/IntrinsicsDirectX.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/IR/Use.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" @@ -42,6 +44,7 @@ class OpLowerer { DXILResourceTypeMap &DRTM; const ModuleMetadataInfo &MMDI; SmallVector CleanupCasts; + Function *CleanupNURI = nullptr; public: OpLowerer(Module &M, DXILResourceMap &DRM, DXILResourceTypeMap &DRTM, @@ -195,6 +198,21 @@ class OpLowerer { CleanupCasts.clear(); } + void cleanupNonUniformResourceIndexCalls() { + // Replace all NonUniformResourceIndex calls with their argument. + if (!CleanupNURI) + return; + for (User *U : make_early_inc_range(CleanupNURI->users())) { + CallInst *CI = dyn_cast(U); + if (!CI) + continue; + CI->replaceAllUsesWith(CI->getArgOperand(0)); + CI->eraseFromParent(); + } + CleanupNURI->eraseFromParent(); + CleanupNURI = nullptr; + } + // Remove the resource global associated with the handleFromBinding call // instruction and their uses as they aren't needed anymore. // TODO: We should verify that all the globals get removed. @@ -229,6 +247,31 @@ class OpLowerer { NameGlobal->removeFromParent(); } + bool hasNonUniformIndex(Value *IndexOp) { + if (isa(IndexOp)) + return false; + + SmallVector WorkList; + WorkList.push_back(IndexOp); + + while (!WorkList.empty()) { + Value *V = WorkList.pop_back_val(); + if (auto *CI = dyn_cast(V)) { + if (CI->getCalledFunction()->getIntrinsicID() == + Intrinsic::dx_resource_nonuniformindex) + return true; + } + if (auto *U = llvm::dyn_cast(V)) { + for (llvm::Value *Op : U->operands()) { + if (isa(Op)) + continue; + WorkList.push_back(Op); + } + } + } + return false; + } + [[nodiscard]] bool lowerToCreateHandle(Function &F) { IRBuilder<> &IRB = OpBuilder.getIRB(); Type *Int8Ty = IRB.getInt8Ty(); @@ -250,13 +293,12 @@ class OpLowerer { IndexOp = IRB.CreateAdd(IndexOp, ConstantInt::get(Int32Ty, Binding.LowerBound)); - // FIXME: The last argument is a NonUniform flag which needs to be set - // based on resource analysis. - // https://github.com/llvm/llvm-project/issues/155701 + bool HasNonUniformIndex = + (Binding.Size == 1) ? false : hasNonUniformIndex(IndexOp); std::array Args{ ConstantInt::get(Int8Ty, llvm::to_underlying(RC)), ConstantInt::get(Int32Ty, Binding.RecordID), IndexOp, - ConstantInt::get(Int1Ty, false)}; + ConstantInt::get(Int1Ty, HasNonUniformIndex)}; Expected OpCall = OpBuilder.tryCreateOp(OpCode::CreateHandle, Args, CI->getName()); if (Error E = OpCall.takeError()) @@ -300,11 +342,10 @@ class OpLowerer { : Binding.LowerBound + Binding.Size - 1; Constant *ResBind = OpBuilder.getResBind(Binding.LowerBound, UpperBound, Binding.Space, RC); - // FIXME: The last argument is a NonUniform flag which needs to be set - // based on resource analysis. - // https://github.com/llvm/llvm-project/issues/155701 - Constant *NonUniform = ConstantInt::get(Int1Ty, false); - std::array BindArgs{ResBind, IndexOp, NonUniform}; + bool NonUniformIndex = + (Binding.Size == 1) ? false : hasNonUniformIndex(IndexOp); + Constant *NonUniformOp = ConstantInt::get(Int1Ty, NonUniformIndex); + std::array BindArgs{ResBind, IndexOp, NonUniformOp}; Expected OpBind = OpBuilder.tryCreateOp( OpCode::CreateHandleFromBinding, BindArgs, CI->getName()); if (Error E = OpBind.takeError()) @@ -868,6 +909,11 @@ class OpLowerer { case Intrinsic::dx_resource_getpointer: HasErrors |= lowerGetPointer(F); break; + case Intrinsic::dx_resource_nonuniformindex: + assert(!CleanupNURI && + "overloaded llvm.dx.resource.nonuniformindex intrinsics?"); + CleanupNURI = &F; + break; case Intrinsic::dx_resource_load_typedbuffer: HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true); break; @@ -908,8 +954,10 @@ class OpLowerer { } Updated = true; } - if (Updated && !HasErrors) + if (Updated && !HasErrors) { cleanupHandleCasts(); + cleanupNonUniformResourceIndexCalls(); + } return Updated; } diff --git a/llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll b/llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll new file mode 100644 index 0000000000000..cfa6c983df3f4 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll @@ -0,0 +1,70 @@ +; RUN: opt -S -passes=dxil-op-lower %s | FileCheck %s + +target triple = "dxil-pc-shadermodel6.0-compute" + +@A.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1 +@B.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1 + +declare i32 @some_val(); + +define void @test_buffers_with_nuri() { + + %val = call i32 @some_val() + %foo = alloca i32, align 4 + + ; RWBuffer A[10]; + ; + ; A[NonUniformResourceIndex(val)]; + + %nuri1 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + %res1 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %nuri1, ptr @A.str) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %val, i1 true) #[[ATTR:.*]] + ; CHECK-NOT: @llvm.dx.cast.handle + ; CHECK-NOT: @llvm.dx.resource.nonuniformindex + + ; A[NonUniformResourceIndex(val + 1) % 10]; + %add1 = add i32 %val, 1 + %nuri2 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %add1) + %rem1 = urem i32 %nuri2, 10 + %res2 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %rem1, ptr @A.str) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %rem1, i1 true) #[[ATTR]] + + ; A[10 + 3 * NonUniformResourceIndex(GI)]; + %mul1 = mul i32 %nuri1, 3 + %add2 = add i32 %mul1, 10 + %res3 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %add2, ptr @A.str) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %add2, i1 true) #[[ATTR]] + + ; NonUniformResourceIndex value going through store & load - the flag is not going to get picked up: + %a = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + store i32 %a, ptr %foo + %b = load i32, ptr %foo + %res4 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %b, ptr @A.str) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %b, i1 false) #[[ATTR]] + + ; NonUniformResourceIndex index value on a single resouce (not an array) - the flag is not going to get picked up: + ; + ; RWBuffer B : register(u20); + ; B[NonUniformResourceIndex(val)]; + %nuri3 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + %res5 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 20, i32 0, i32 1, i32 %nuri1, ptr @B.str) + ; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 %val, i1 false) #[[ATTR]] + + ; NonUniformResourceIndex on unrelated value - the call is removed: + ; foo = NonUniformResourceIndex(val); + %nuri4 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + store i32 %nuri4, ptr %foo + ; CHECK: store i32 %val, ptr %foo + ; CHECK-NOT: @llvm.dx.resource.nonuniformindex + + ret void +} + +; CHECK: attributes #[[ATTR]] = {{{.*}} memory(read) {{.*}}} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } diff --git a/llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll new file mode 100644 index 0000000000000..80bf5a6a67c91 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll @@ -0,0 +1,77 @@ +; RUN: opt -S -passes=dxil-op-lower %s | FileCheck %s + +target triple = "dxil-pc-shadermodel6.6-compute" + +@A.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1 +@B.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1 + +declare i32 @some_val(); + +define void @test_buffers_with_nuri() { + + %val = call i32 @some_val() + %foo = alloca i32, align 4 + + ; RWBuffer A[10]; + ; + ; A[NonUniformResourceIndex(val)]; + + %nuri1 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + %res1 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %nuri1, ptr @A.str) + ; CHECK: %[[RES1:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %val, i1 true) #[[ATTR:.*]] + ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES1]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]] + ; CHECK-NOT: @llvm.dx.cast.handle + ; CHECK-NOT: @llvm.dx.resource.nonuniformindex + + ; A[NonUniformResourceIndex(val + 1) % 10]; + %add1 = add i32 %val, 1 + %nuri2 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %add1) + %rem1 = urem i32 %nuri2, 10 + %res2 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %rem1, ptr @A.str) + ; CHECK: %[[RES2:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %rem1, i1 true) #[[ATTR]] + ; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES2]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]] + + ; A[10 + 3 * NonUniformResourceIndex(GI)]; + %mul1 = mul i32 %nuri1, 3 + %add2 = add i32 %mul1, 10 + %res3 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %add2, ptr @A.str) + ; CHECK: %[[RES3:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %add2, i1 true) #[[ATTR]] + ; CHECK: %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES3]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]] + ret void + + ; NonUniformResourceIndex value going through store & load: the flag is not going to get picked up + %a = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + store i32 %a, ptr %foo + %b = load i32, ptr %foo + %res4 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %b, ptr @A.str) + ; CHECK: %[[RES4:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %b, i1 false) #[[ATTR]] + ; CHECK: %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES4]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]] + + ; NonUniformResourceIndex index value on a single resouce (not an array): the flag is not going to get picked up + ; RWBuffer B : register(u20); + ; + ; B[NonUniformResourceIndex(val)]; + + %nuri3 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + %res5 = call target("dx.TypedBuffer", float, 1, 0, 0) + @llvm.dx.resource.handlefrombinding(i32 20, i32 0, i32 1, i32 %nuri1, ptr @B.str) + ; CHECK: %[[RES4:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 20, i8 1 }, i32 %val, i1 false) #[[ATTR]] + ; CHECK: %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES4]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]] + + ; NonUniformResourceIndex on unrelated value - the call is removed: + ; foo = NonUniformResourceIndex(val); + %nuri4 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val) + store i32 %nuri4, ptr %foo + ; CHECK: store i32 %val, ptr %foo + ; CHECK-NOT: @llvm.dx.resource.nonuniformindex + + ret void +} + +; CHECK: attributes #[[ATTR]] = {{{.*}} memory(none) {{.*}}} + +attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }