diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 35d2c3e19fdf9..9bc70ea5e5858 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -4945,6 +4945,12 @@ def HLSLResourceHandleFromImplicitBinding : LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLResourceNonUniformIndex : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_resource_nonuniformindex"]; + let Attributes = [NoThrow]; + let Prototype = "uint32_t(uint32_t)"; +} + def HLSLAll : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_all"]; let Attributes = [NoThrow, Const]; diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index 7b5b924b1fe82..9f87afa5a8a3d 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -352,6 +352,13 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, SmallVector Args{OrderID, SpaceOp, RangeOp, IndexOp, Name}; return Builder.CreateIntrinsic(HandleTy, IntrinsicID, Args); } + case Builtin::BI__builtin_hlsl_resource_nonuniformindex: { + Value *IndexOp = EmitScalarExpr(E->getArg(0)); + llvm::Type *RetTy = ConvertType(E->getType()); + return Builder.CreateIntrinsic( + RetTy, CGM.getHLSLRuntime().getNonUniformResourceIndexIntrinsic(), + ArrayRef{IndexOp}); + } case Builtin::BI__builtin_hlsl_all: { Value *Op0 = EmitScalarExpr(E->getArg(0)); return Builder.CreateIntrinsic( diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index 370f3d5c5d30d..f4b410664d60c 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -129,6 +129,8 @@ class CGHLSLRuntime { resource_handlefrombinding) GENERATE_HLSL_INTRINSIC_FUNCTION(CreateHandleFromImplicitBinding, resource_handlefromimplicitbinding) + GENERATE_HLSL_INTRINSIC_FUNCTION(NonUniformResourceIndex, + resource_nonuniformindex) GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter) GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync, group_memory_barrier_with_group_sync) diff --git a/clang/lib/Headers/hlsl/hlsl_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_intrinsics.h index d9d87c827e6a4..5ba5bfb9abde0 100644 --- a/clang/lib/Headers/hlsl/hlsl_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_intrinsics.h @@ -422,6 +422,30 @@ constexpr int4 D3DCOLORtoUBYTE4(float4 V) { return __detail::d3d_color_to_ubyte4_impl(V); } +//===----------------------------------------------------------------------===// +// NonUniformResourceIndex builtin +//===----------------------------------------------------------------------===// + +/// \fn uint NonUniformResourceIndex(uint I) +/// \brief A compiler hint to indicate that a resource index varies across +/// threads within a wave (i.e., it is non-uniform). +/// \param I [in] Resource array index +/// +/// The return value is the \Index parameter. +/// +/// When indexing into an array of shader resources (e.g., textures, buffers), +/// some GPU hardware and drivers require the compiler to know whether the index +/// is uniform (same for all threads) or non-uniform (varies per thread). +/// +/// Using NonUniformResourceIndex explicitly marks an index as non-uniform, +/// disabling certain assumptions or optimizations that could lead to incorrect +/// behavior when dynamically accessing resource arrays with non-uniform +/// indices. + +constexpr uint32_t NonUniformResourceIndex(uint32_t Index) { + return __builtin_hlsl_resource_nonuniformindex(Index); +} + //===----------------------------------------------------------------------===// // reflect builtin //===----------------------------------------------------------------------===// diff --git a/clang/test/CodeGenHLSL/resources/NonUniformResourceIndex.hlsl b/clang/test/CodeGenHLSL/resources/NonUniformResourceIndex.hlsl new file mode 100644 index 0000000000000..ab512ce111d19 --- /dev/null +++ b/clang/test/CodeGenHLSL/resources/NonUniformResourceIndex.hlsl @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.3-compute -emit-llvm -disable-llvm-passes -o - %s \ +// RUN: | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,DXIL +// RUN: %clang_cc1 -finclude-default-header -triple spirv-pc-vulkan1.3-compute -emit-llvm -disable-llvm-passes -o - %s \ +// RUN: | llvm-cxxfilt | FileCheck %s --check-prefixes=CHECK,SPV + +RWBuffer A[10]; + +[numthreads(4,1,1)] +void main(uint GI : SV_GroupID) { + // CHECK: %[[GI:.*]] = load i32, ptr %GI.addr + // CHECK: %[[NURI_1:.*]] = call {{.*}} i32 @hlsl::NonUniformResourceIndex(unsigned int)(i32 noundef %[[GI]]) + // CHECK: call void @hlsl::RWBuffer::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*) + // CHECK-SAME: (ptr {{.*}}, i32 noundef 0, i32 noundef 0, i32 noundef 10, i32 noundef %[[NURI_1]], ptr noundef @A.str) + float a = A[NonUniformResourceIndex(GI)][0]; + + // CHECK: %[[GI:.*]] = load i32, ptr %GI.addr + // CHECK: %[[ADD:.*]] = add i32 %[[GI]], 1 + // CHECK: %[[NURI_2:.*]] = call {{.*}} i32 @hlsl::NonUniformResourceIndex(unsigned int)(i32 noundef %[[ADD]]) + // CHECK: %[[MOD:.*]] = urem i32 %[[NURI_2]], 10 + // CHECK: call void @hlsl::RWBuffer::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*) + // CHECK-SAME: (ptr {{.*}}, i32 noundef 0, i32 noundef 0, i32 noundef 10, i32 noundef %[[MOD]], ptr noundef @A.str) + float b = A[NonUniformResourceIndex(GI + 1) % 10][0]; + + // CHECK: %[[GI:.*]] = load i32, ptr %GI.addr + // CHECK: %[[NURI_3:.*]] = call {{.*}} i32 @hlsl::NonUniformResourceIndex(unsigned int)(i32 noundef %[[GI]]) + // CHECK: %[[MUL:.*]] = mul i32 3, %[[NURI_3]] + // CHECK: %[[ADD2:.*]] = add i32 10, %[[MUL]] + // CHECK: call void @hlsl::RWBuffer::__createFromImplicitBinding(unsigned int, unsigned int, int, unsigned int, char const*) + // CHECK-SAME: (ptr {{.*}}, i32 noundef 0, i32 noundef 0, i32 noundef 10, i32 noundef %[[ADD2]], ptr noundef @A.str) + float c = A[10 + 3 * NonUniformResourceIndex(GI)][0]; + A[0][0] = a + b + c; +} + +// CHECK: define {{.*}} i32 @hlsl::NonUniformResourceIndex(unsigned int)(i32 noundef %Index) +// CHECK: %[[INDEX1:.*]] = load i32, ptr %Index.addr, align 4 +// DXIL: %[[INDEX2:.*]] = call i32 @llvm.dx.resource.nonuniformindex(i32 %[[INDEX1]]) +// SPV: %[[INDEX2:.*]] = call i32 @llvm.spv.resource.nonuniformindex(i32 %[[INDEX1]]) +// CHECK: ret i32 %[[INDEX2]]