diff --git a/test/WaveOps/GroupMemoryBarrierWithGroupSync.test b/test/WaveOps/GroupMemoryBarrierWithGroupSync.test new file mode 100644 index 000000000..fb0422f34 --- /dev/null +++ b/test/WaveOps/GroupMemoryBarrierWithGroupSync.test @@ -0,0 +1,92 @@ +#--- source.hlsl + +RWStructuredBuffer Out : register(u0); + +groupshared uint4 SharedData; +groupshared uint4 Indices[128]; + +// Note: Placing GroupMemoryBarrierWithGroupSync in divergent control branches +// is undefined, and hence, untested + +[numthreads(128,4,1)] +void main(uint3 ThreadID : SV_GroupThreadID) { + + // Basic Broadcast + if (ThreadID.x == 127 && ThreadID.y == 3) { + SharedData = 1; + } + + // Prevents SharedData being read below before being initialized + GroupMemoryBarrierWithGroupSync(); + + if (ThreadID.x == 0) { + Out[0][ThreadID.y] = SharedData[ThreadID.y]; + } + + // Prevents SharedData being updated below before written to Out[0] + GroupMemoryBarrierWithGroupSync(); + + // Interlocked Accumulation + for (uint I = 0; I < 128; I++) { + if (ThreadID.x == I) { + SharedData[ThreadID.y] = SharedData[ThreadID.y] + 1; + } + + // Prevents SharedData datarace across ThreadID.x, and, + // SharedData being written before fully accumulated + GroupMemoryBarrierWithGroupSync(); + } + + if (ThreadID.x == 127) { + Out[1][ThreadID.y] = SharedData[ThreadID.y]; + } +} + +//--- pipeline.yaml +--- +Shaders: + - Stage: Compute + Entry: main + DispatchSize: [1, 1, 1] +Buffers: + - Name: Out + Format: UInt32 + Channels: 4 + ZeroInitSize: 32 + - Name: ExpectedOut + Format: UInt32 + Channels: 4 + Data: [ + 1, 1, 1, 1, # Broadcast + 129, 129, 129, 129, # Accumulation + ] +Results: + - Result: ExpectedOut + Rule: BufferExact + Actual: Out + Expected: ExpectedOut +DescriptorSets: + - Resources: + - Name: Out + Kind: RWStructuredBuffer + DirectXBinding: + Register: 0 + Space: 0 + VulkanBinding: + Binding: 0 +... +#--- end + +# Bug: https://github.com/llvm/offload-test-suite/issues/444 +# XFAIL: Metal + +# Bug https://github.com/llvm/offload-test-suite/issues/445 +# XFAIL: DirectX-Intel + +# The data-race is not observed on WARP +# Bug https://github.com/llvm/llvm-project/issues/160208 +# XFAIL: Clang && !WARP + +# RUN: split-file %s %t +# RUN: %dxc_target -T cs_6_5 -Fo %t.o %t/source.hlsl +# RUN: %offloader %t/pipeline.yaml %t.o diff --git a/test/lit.cfg.py b/test/lit.cfg.py index f3621950d..27d5a067b 100644 --- a/test/lit.cfg.py +++ b/test/lit.cfg.py @@ -63,6 +63,7 @@ def setDeviceFeatures(config, device, compiler): if "Microsoft Basic Render Driver" in device["Description"]: config.available_features.add("%s-WARP" % API) config.available_features.add("WARP-%s" % config.warp_arch) + config.available_features.add("WARP") if "Intel" in device["Description"]: config.available_features.add("%s-Intel" % API) if "UHD Graphics" in device["Description"] and API == "DirectX":