diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index b786fee9fc..977e0061fe 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -1510,7 +1510,7 @@ static const OP::OpCodeProperty CoreOps_OpCodeProps[] = { "SetMeshOutputCounts", OCC::SetMeshOutputCounts, "setMeshOutputCounts", - Attribute::None, + Attribute::NoDuplicate, 0, {}, {}}, // Overloads: v @@ -1552,7 +1552,7 @@ static const OP::OpCodeProperty CoreOps_OpCodeProps[] = { "DispatchMesh", OCC::DispatchMesh, "dispatchMesh", - Attribute::None, + Attribute::NoDuplicate, 1, {{0x100}}, {{0x0}}}, // Overloads: u diff --git a/lib/HLSL/HLOperations.cpp b/lib/HLSL/HLOperations.cpp index 2cb3c489e8..23a6632212 100644 --- a/lib/HLSL/HLOperations.cpp +++ b/lib/HLSL/HLOperations.cpp @@ -531,6 +531,8 @@ static AttributeSet GetHLFunctionAttributes(LLVMContext &C, case IntrinsicOp::IOP_GroupMemoryBarrier: case IntrinsicOp::IOP_AllMemoryBarrierWithGroupSync: case IntrinsicOp::IOP_AllMemoryBarrier: + case IntrinsicOp::IOP_SetMeshOutputCounts: + case IntrinsicOp::IOP_DispatchMesh: addAttr(Attribute::NoDuplicate); break; } diff --git a/tools/clang/test/CodeGenHLSL/mesh-val/sinkSetMeshOutputCounts.hlsl b/tools/clang/test/CodeGenHLSL/mesh-val/sinkSetMeshOutputCounts.hlsl new file mode 100644 index 0000000000..1c5121f0fc --- /dev/null +++ b/tools/clang/test/CodeGenHLSL/mesh-val/sinkSetMeshOutputCounts.hlsl @@ -0,0 +1,50 @@ +// RUN: %dxc -E main -T ms_6_5 %s | FileCheck %s + +// Test that SetMeshOutputCounts has noduplicate attribute, preventing the +// optimizer from sinking/duplicating the call into branches that compute +// the count values. + +// CHECK: dx.op.setMeshOutputCounts +// CHECK: noduplicate + +#define MAX_VERT 32 +#define MAX_PRIM 16 +#define NUM_THREADS 32 + +struct MeshPerVertex { + float4 position : SV_Position; +}; + +struct MeshPayload { + float normal; +}; + +[numthreads(NUM_THREADS, 1, 1)] +[outputtopology("triangle")] +void main( + out indices uint3 primIndices[MAX_PRIM], + out vertices MeshPerVertex verts[MAX_VERT], + in payload MeshPayload mpl, + in uint tig : SV_GroupIndex, + in uint vid : SV_ViewID + ) +{ + // Compute counts in a branch - optimizer used to sink SetMeshOutputCounts + // into each branch, producing two copies and failing validation. + uint nverts, nprims; + if (vid % 2) { + nverts = MAX_VERT; + nprims = MAX_PRIM; + } else { + nverts = MAX_VERT / 2; + nprims = MAX_PRIM / 2; + } + SetMeshOutputCounts(nverts, nprims); + + if (tig < nverts) { + verts[tig].position = float4(0, 0, 0, 1); + } + if (tig < nprims) { + primIndices[tig] = uint3(tig * 3, tig * 3 + 1, tig * 3 + 2); + } +} diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 634b62cc6f..d3679bfbd1 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -4156,7 +4156,7 @@ def UFI(name, **mappings): "SetMeshOutputCounts", "Mesh shader intrinsic SetMeshOutputCounts", "v", - "", + "nd", [ retvoid_param, db_dxil_param(2, "i32", "numVertices", "number of output vertices"), @@ -4234,7 +4234,7 @@ def UFI(name, **mappings): "DispatchMesh", "Amplification shader intrinsic DispatchMesh", "u", - "", + "nd", [ retvoid_param, db_dxil_param(2, "i32", "threadGroupCountX", "thread group count x"),