Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsDirectX.td
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ def int_dx_resource_handlefromimplicitbinding
def int_dx_resource_getpointer
: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty],
[IntrNoMem]>;

def int_dx_resource_nonuniformindex
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;

def int_dx_resource_load_typedbuffer
: DefaultAttrsIntrinsic<[llvm_any_ty, llvm_i1_ty],
[llvm_any_ty, llvm_i32_ty], [IntrReadMem]>;
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsSPIRV.td
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty]
: DefaultAttrsIntrinsic<[llvm_anyptr_ty], [llvm_any_ty, llvm_i32_ty],
[IntrNoMem]>;

def int_spv_resource_nonuniformindex
: DefaultAttrsIntrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;

// Read a value from the image buffer. It does not translate directly to a
// single OpImageRead because the result type is not necessarily a 4 element
// vector.
Expand Down
68 changes: 58 additions & 10 deletions llvm/lib/Target/DirectX/DXILOpLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "llvm/Analysis/DXILMetadataAnalysis.h"
#include "llvm/Analysis/DXILResource.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
Expand All @@ -24,6 +25,7 @@
#include "llvm/IR/IntrinsicsDirectX.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Use.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/ErrorHandling.h"
Expand All @@ -42,6 +44,7 @@ class OpLowerer {
DXILResourceTypeMap &DRTM;
const ModuleMetadataInfo &MMDI;
SmallVector<CallInst *> CleanupCasts;
Function *CleanupNURI = nullptr;

public:
OpLowerer(Module &M, DXILResourceMap &DRM, DXILResourceTypeMap &DRTM,
Expand Down Expand Up @@ -195,6 +198,21 @@ class OpLowerer {
CleanupCasts.clear();
}

void cleanupNonUniformResourceIndexCalls() {
// Replace all NonUniformResourceIndex calls with their argument.
if (!CleanupNURI)
return;
for (User *U : make_early_inc_range(CleanupNURI->users())) {
CallInst *CI = dyn_cast<CallInst>(U);
if (!CI)
continue;
CI->replaceAllUsesWith(CI->getArgOperand(0));
CI->eraseFromParent();
}
CleanupNURI->eraseFromParent();
CleanupNURI = nullptr;
}

// Remove the resource global associated with the handleFromBinding call
// instruction and their uses as they aren't needed anymore.
// TODO: We should verify that all the globals get removed.
Expand Down Expand Up @@ -229,6 +247,31 @@ class OpLowerer {
NameGlobal->removeFromParent();
}

bool hasNonUniformIndex(Value *IndexOp) {
if (isa<llvm::Constant>(IndexOp))
return false;

SmallVector<Value *> WorkList;
WorkList.push_back(IndexOp);

while (!WorkList.empty()) {
Value *V = WorkList.pop_back_val();
if (auto *CI = dyn_cast<CallInst>(V)) {
if (CI->getCalledFunction()->getIntrinsicID() ==
Intrinsic::dx_resource_nonuniformindex)
return true;
}
if (auto *U = llvm::dyn_cast<llvm::User>(V)) {
for (llvm::Value *Op : U->operands()) {
if (isa<llvm::Constant>(Op))
continue;
WorkList.push_back(Op);
}
}
}
return false;
}

[[nodiscard]] bool lowerToCreateHandle(Function &F) {
IRBuilder<> &IRB = OpBuilder.getIRB();
Type *Int8Ty = IRB.getInt8Ty();
Expand All @@ -250,13 +293,12 @@ class OpLowerer {
IndexOp = IRB.CreateAdd(IndexOp,
ConstantInt::get(Int32Ty, Binding.LowerBound));

// FIXME: The last argument is a NonUniform flag which needs to be set
// based on resource analysis.
// https://github.com/llvm/llvm-project/issues/155701
bool HasNonUniformIndex =
(Binding.Size == 1) ? false : hasNonUniformIndex(IndexOp);
std::array<Value *, 4> Args{
ConstantInt::get(Int8Ty, llvm::to_underlying(RC)),
ConstantInt::get(Int32Ty, Binding.RecordID), IndexOp,
ConstantInt::get(Int1Ty, false)};
ConstantInt::get(Int1Ty, HasNonUniformIndex)};
Expected<CallInst *> OpCall =
OpBuilder.tryCreateOp(OpCode::CreateHandle, Args, CI->getName());
if (Error E = OpCall.takeError())
Expand Down Expand Up @@ -300,11 +342,10 @@ class OpLowerer {
: Binding.LowerBound + Binding.Size - 1;
Constant *ResBind = OpBuilder.getResBind(Binding.LowerBound, UpperBound,
Binding.Space, RC);
// FIXME: The last argument is a NonUniform flag which needs to be set
// based on resource analysis.
// https://github.com/llvm/llvm-project/issues/155701
Constant *NonUniform = ConstantInt::get(Int1Ty, false);
std::array<Value *, 3> BindArgs{ResBind, IndexOp, NonUniform};
bool NonUniformIndex =
(Binding.Size == 1) ? false : hasNonUniformIndex(IndexOp);
Constant *NonUniformOp = ConstantInt::get(Int1Ty, NonUniformIndex);
std::array<Value *, 3> BindArgs{ResBind, IndexOp, NonUniformOp};
Expected<CallInst *> OpBind = OpBuilder.tryCreateOp(
OpCode::CreateHandleFromBinding, BindArgs, CI->getName());
if (Error E = OpBind.takeError())
Expand Down Expand Up @@ -868,6 +909,11 @@ class OpLowerer {
case Intrinsic::dx_resource_getpointer:
HasErrors |= lowerGetPointer(F);
break;
case Intrinsic::dx_resource_nonuniformindex:
assert(!CleanupNURI &&
"overloaded llvm.dx.resource.nonuniformindex intrinsics?");
CleanupNURI = &F;
break;
case Intrinsic::dx_resource_load_typedbuffer:
HasErrors |= lowerTypedBufferLoad(F, /*HasCheckBit=*/true);
break;
Expand Down Expand Up @@ -908,8 +954,10 @@ class OpLowerer {
}
Updated = true;
}
if (Updated && !HasErrors)
if (Updated && !HasErrors) {
cleanupHandleCasts();
cleanupNonUniformResourceIndexCalls();
}

return Updated;
}
Expand Down
70 changes: 70 additions & 0 deletions llvm/test/CodeGen/DirectX/CreateHandle-NURI.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
; RUN: opt -S -passes=dxil-op-lower %s | FileCheck %s

target triple = "dxil-pc-shadermodel6.0-compute"

@A.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1
@B.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1

declare i32 @some_val();

define void @test_buffers_with_nuri() {

%val = call i32 @some_val()
%foo = alloca i32, align 4

; RWBuffer<float> A[10];
;
; A[NonUniformResourceIndex(val)];

%nuri1 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val)
%res1 = call target("dx.TypedBuffer", float, 1, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %nuri1, ptr @A.str)
; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %val, i1 true) #[[ATTR:.*]]
; CHECK-NOT: @llvm.dx.cast.handle
; CHECK-NOT: @llvm.dx.resource.nonuniformindex

; A[NonUniformResourceIndex(val + 1) % 10];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is interesting. I feel like DXC doesn't really support this kind of case well.

%add1 = add i32 %val, 1
%nuri2 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %add1)
%rem1 = urem i32 %nuri2, 10
%res2 = call target("dx.TypedBuffer", float, 1, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %rem1, ptr @A.str)
; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %rem1, i1 true) #[[ATTR]]

; A[10 + 3 * NonUniformResourceIndex(GI)];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about something like:

int tmp = NonUniformResourceIndex(GI);
A[tmp];

Copy link
Member Author

@hekota hekota Sep 19, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This works as long as the A[tmp] is optimized into A[NonUniformResourceIndex(GI)], which is very likely since tmp is an int. But if it goes through store & load, the lowerer will not pick it up. Is that ok? I did consider limiting the NonUniformResourceIndex lookup to just single check directly on the index (i.e. the call would have to always be directly on the index value), to make it deterministic.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's difficult to say where to draw the line here. DXC requires it to be on the index, but it's possible that's applied after SSA (so store & load chain may be collapsed).

FXC was a bit more flexible, leading to code which would work for FXC, but not for DXC. However, there wasn't a hard rule you could follow to guarantee it would work still.

If we have the rule that it must be on the index used directly in the operation, which is what we've said for DXC, it's easiest to make sure that works. Some HLSL authors have complained a bit about this strictness though.

If we could find another objective rule we could apply and guarantee works which gives more flexibility, we could use that, but we might never implement that for DXC.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect this handling is already more robust than DXC, so we can probably just call it good.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we can, we should consider emitting some error, or at least a warning, if a NURI call doesn't lead to marking a resource indexing create handle as NURI.

Copy link
Member Author

@hekota hekota Sep 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the call can be safely ignored when it is not on a resource index and there are no side effects, I don't think adding a special check just to emit a warning is useful.

%mul1 = mul i32 %nuri1, 3
%add2 = add i32 %mul1, 10
%res3 = call target("dx.TypedBuffer", float, 1, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %add2, ptr @A.str)
; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %add2, i1 true) #[[ATTR]]

; NonUniformResourceIndex value going through store & load - the flag is not going to get picked up:
%a = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val)
store i32 %a, ptr %foo
%b = load i32, ptr %foo
%res4 = call target("dx.TypedBuffer", float, 1, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %b, ptr @A.str)
; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 0, i32 %b, i1 false) #[[ATTR]]

; NonUniformResourceIndex index value on a single resouce (not an array) - the flag is not going to get picked up:
;
; RWBuffer<float> B : register(u20);
; B[NonUniformResourceIndex(val)];
%nuri3 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val)
%res5 = call target("dx.TypedBuffer", float, 1, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 20, i32 0, i32 1, i32 %nuri1, ptr @B.str)
; CHECK: call %dx.types.Handle @dx.op.createHandle(i32 57, i8 1, i32 1, i32 %val, i1 false) #[[ATTR]]

; NonUniformResourceIndex on unrelated value - the call is removed:
; foo = NonUniformResourceIndex(val);
%nuri4 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val)
store i32 %nuri4, ptr %foo
; CHECK: store i32 %val, ptr %foo
; CHECK-NOT: @llvm.dx.resource.nonuniformindex

ret void
}

; CHECK: attributes #[[ATTR]] = {{{.*}} memory(read) {{.*}}}

attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
77 changes: 77 additions & 0 deletions llvm/test/CodeGen/DirectX/CreateHandleFromBinding-NURI.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
; RUN: opt -S -passes=dxil-op-lower %s | FileCheck %s

target triple = "dxil-pc-shadermodel6.6-compute"

@A.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1
@B.str = internal unnamed_addr constant [2 x i8] c"A\00", align 1

declare i32 @some_val();

define void @test_buffers_with_nuri() {

%val = call i32 @some_val()
%foo = alloca i32, align 4

; RWBuffer<float> A[10];
;
; A[NonUniformResourceIndex(val)];

%nuri1 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val)
%res1 = call target("dx.TypedBuffer", float, 1, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %nuri1, ptr @A.str)
; CHECK: %[[RES1:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %val, i1 true) #[[ATTR:.*]]
; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES1]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]]
; CHECK-NOT: @llvm.dx.cast.handle
; CHECK-NOT: @llvm.dx.resource.nonuniformindex

; A[NonUniformResourceIndex(val + 1) % 10];
%add1 = add i32 %val, 1
%nuri2 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %add1)
%rem1 = urem i32 %nuri2, 10
%res2 = call target("dx.TypedBuffer", float, 1, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %rem1, ptr @A.str)
; CHECK: %[[RES2:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %rem1, i1 true) #[[ATTR]]
; CHECK: call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES2]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]]

; A[10 + 3 * NonUniformResourceIndex(GI)];
%mul1 = mul i32 %nuri1, 3
%add2 = add i32 %mul1, 10
%res3 = call target("dx.TypedBuffer", float, 1, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %add2, ptr @A.str)
; CHECK: %[[RES3:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %add2, i1 true) #[[ATTR]]
; CHECK: %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES3]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]]
ret void

; NonUniformResourceIndex value going through store & load: the flag is not going to get picked up
%a = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val)
store i32 %a, ptr %foo
%b = load i32, ptr %foo
%res4 = call target("dx.TypedBuffer", float, 1, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 0, i32 0, i32 10, i32 %b, ptr @A.str)
; CHECK: %[[RES4:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 9, i32 0, i8 1 }, i32 %b, i1 false) #[[ATTR]]
; CHECK: %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES4]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]]

; NonUniformResourceIndex index value on a single resouce (not an array): the flag is not going to get picked up
; RWBuffer<float> B : register(u20);
;
; B[NonUniformResourceIndex(val)];

%nuri3 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val)
%res5 = call target("dx.TypedBuffer", float, 1, 0, 0)
@llvm.dx.resource.handlefrombinding(i32 20, i32 0, i32 1, i32 %nuri1, ptr @B.str)
; CHECK: %[[RES4:.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 20, i8 1 }, i32 %val, i1 false) #[[ATTR]]
; CHECK: %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RES4]], %dx.types.ResourceProperties { i32 4106, i32 265 }) #[[ATTR]]

; NonUniformResourceIndex on unrelated value - the call is removed:
; foo = NonUniformResourceIndex(val);
%nuri4 = tail call noundef i32 @llvm.dx.resource.nonuniformindex(i32 %val)
store i32 %nuri4, ptr %foo
; CHECK: store i32 %val, ptr %foo
; CHECK-NOT: @llvm.dx.resource.nonuniformindex

ret void
}

; CHECK: attributes #[[ATTR]] = {{{.*}} memory(none) {{.*}}}

attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) }
Loading