diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 5b6e45de14994..a0f8d792b9c23 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5661,6 +5661,30 @@ def HLSLClip: LangBuiltin<"HLSL_LANG"> { let Prototype = "void(...)"; } +def HLSLAllMemoryBarrier : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_all_memory_barrier"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void()"; +} + +def HLSLAllMemoryBarrierWithGroupSync: LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_all_memory_barrier_with_group_sync"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void()"; +} + +def HLSLDeviceMemoryBarrier : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_device_memory_barrier"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void()"; +} + +def HLSLDeviceMemoryBarrierWithGroupSync: LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_device_memory_barrier_with_group_sync"]; + let Attributes = [NoThrow, Const]; + let Prototype = "void()"; +} + def HLSLGroupMemoryBarrier : LangBuiltin<"HLSL_LANG"> { let Spellings = ["__builtin_hlsl_group_memory_barrier"]; let Attributes = [NoThrow, Const]; diff --git a/clang/include/clang/Basic/HLSLIntrinsics.td b/clang/include/clang/Basic/HLSLIntrinsics.td index ff40f6b6f36f9..144b27cab7398 100644 --- a/clang/include/clang/Basic/HLSLIntrinsics.td +++ b/clang/include/clang/Basic/HLSLIntrinsics.td @@ -995,8 +995,60 @@ with regard to the x and y screen space coordinates. let VaryingMatDims = []; } -// Blocks execution of all threads in a group until all group shared accesses +// Blocks execution of all threads in a group until all memory accesses +// have been completed. +def hlsl_all_memory_barrier : + HLSLBuiltin<"AllMemoryBarrier", + "__builtin_hlsl_all_memory_barrier"> { + let Doc = [{ +\fn void AllMemoryBarrier(void) +\brief Blocks execution of all threads in a group until all memory +accesses have been completed. +}]; + let IsConvergent = 1; +} + +// Blocks execution of all threads in a group until all memory accesses +// have been completed and all threads in the group have reached this call. +def hlsl_all_memory_barrier_with_group_sync : + HLSLBuiltin<"AllMemoryBarrierWithGroupSync", + "__builtin_hlsl_all_memory_barrier_with_group_sync"> { + let Doc = [{ +\fn void AllMemoryBarrierWithGroupSync(void) +\brief Blocks execution of all threads in a group until all memory +accesses have been completed and all threads in the group have reached this call. +}]; + let IsConvergent = 1; +} + +// Blocks execution of all threads in a group until all device memory accesses +// have been completed. +def hlsl_device_memory_barrier : + HLSLBuiltin<"DeviceMemoryBarrier", + "__builtin_hlsl_device_memory_barrier"> { + let Doc = [{ +\fn void DeviceMemoryBarrier(void) +\brief Blocks execution of all threads in a group until all device memory +accesses have been completed. +}]; + let IsConvergent = 1; +} + +// Blocks execution of all threads in a group until all device memory accesses // have been completed and all threads in the group have reached this call. +def hlsl_device_memory_barrier_with_group_sync : + HLSLBuiltin<"DeviceMemoryBarrierWithGroupSync", + "__builtin_hlsl_device_memory_barrier_with_group_sync"> { + let Doc = [{ +\fn void DeviceMemoryBarrierWithGroupSync(void) +\brief Blocks execution of all threads in a group until all device memory +accesses have been completed and all threads in the group have reached this call. +}]; + let IsConvergent = 1; +} + +// Blocks execution of all threads in a group until all group shared accesses +// have been completed. def hlsl_group_memory_barrier : HLSLBuiltin<"GroupMemoryBarrier", "__builtin_hlsl_group_memory_barrier"> { diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index f510195dbd6cb..b82a237ecefca 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -1566,6 +1566,28 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, assert(E->getArg(0)->getType()->hasFloatingRepresentation() && "clip operands types mismatch"); return handleHlslClip(E, this); + case Builtin::BI__builtin_hlsl_all_memory_barrier: { + Intrinsic::ID ID = CGM.getHLSLRuntime().getAllMemoryBarrierIntrinsic(); + return EmitRuntimeCall( + Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + } + case Builtin::BI__builtin_hlsl_all_memory_barrier_with_group_sync: { + Intrinsic::ID ID = + CGM.getHLSLRuntime().getAllMemoryBarrierWithGroupSyncIntrinsic(); + return EmitRuntimeCall( + Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + } + case Builtin::BI__builtin_hlsl_device_memory_barrier: { + Intrinsic::ID ID = CGM.getHLSLRuntime().getDeviceMemoryBarrierIntrinsic(); + return EmitRuntimeCall( + Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + } + case Builtin::BI__builtin_hlsl_device_memory_barrier_with_group_sync: { + Intrinsic::ID ID = + CGM.getHLSLRuntime().getDeviceMemoryBarrierWithGroupSyncIntrinsic(); + return EmitRuntimeCall( + Intrinsic::getOrInsertDeclaration(&CGM.getModule(), ID)); + } case Builtin::BI__builtin_hlsl_group_memory_barrier: { Intrinsic::ID ID = CGM.getHLSLRuntime().getGroupMemoryBarrierIntrinsic(); return EmitRuntimeCall( diff --git a/clang/lib/CodeGen/CGHLSLRuntime.h b/clang/lib/CodeGen/CGHLSLRuntime.h index b1c5b3318a11e..21e7ddf394bbd 100644 --- a/clang/lib/CodeGen/CGHLSLRuntime.h +++ b/clang/lib/CodeGen/CGHLSLRuntime.h @@ -189,6 +189,12 @@ class CGHLSLRuntime { GENERATE_HLSL_INTRINSIC_FUNCTION(NonUniformResourceIndex, resource_nonuniformindex) GENERATE_HLSL_INTRINSIC_FUNCTION(BufferUpdateCounter, resource_updatecounter) + GENERATE_HLSL_INTRINSIC_FUNCTION(AllMemoryBarrier, all_memory_barrier) + GENERATE_HLSL_INTRINSIC_FUNCTION(AllMemoryBarrierWithGroupSync, + all_memory_barrier_with_group_sync) + GENERATE_HLSL_INTRINSIC_FUNCTION(DeviceMemoryBarrier, device_memory_barrier) + GENERATE_HLSL_INTRINSIC_FUNCTION(DeviceMemoryBarrierWithGroupSync, + device_memory_barrier_with_group_sync) GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrier, group_memory_barrier) GENERATE_HLSL_INTRINSIC_FUNCTION(GroupMemoryBarrierWithGroupSync, group_memory_barrier_with_group_sync) diff --git a/clang/test/CodeGenHLSL/builtins/AllMemoryBarrier.hlsl b/clang/test/CodeGenHLSL/builtins/AllMemoryBarrier.hlsl new file mode 100644 index 0000000000000..90d51c716c771 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/AllMemoryBarrier.hlsl @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -DTARGET=dx -check-prefixes=CHECK,CHECK-DXIL +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -DTARGET=spv -check-prefixes=CHECK,CHECK-SPIRV + +// CHECK-DXIL: define hidden void @ +// CHECK-SPIRV: define hidden spir_func void @ +void test_AllMemoryBarrier() { +// CHECK-DXIL: call void @llvm.[[TARGET]].all.memory.barrier() +// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].all.memory.barrier() + AllMemoryBarrier(); +} + +// CHECK: declare void @llvm.[[TARGET]].all.memory.barrier() #[[ATTRS:[0-9]+]] +// CHECK-NOT: attributes #[[ATTRS]] = {{.+}}memory(none){{.+}} +// CHECK: attributes #[[ATTRS]] = {{.+}}convergent{{.+}} diff --git a/clang/test/CodeGenHLSL/builtins/AllMemoryBarrierWithGroupSync.hlsl b/clang/test/CodeGenHLSL/builtins/AllMemoryBarrierWithGroupSync.hlsl new file mode 100644 index 0000000000000..6ddb69671e094 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/AllMemoryBarrierWithGroupSync.hlsl @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -DTARGET=dx -check-prefixes=CHECK,CHECK-DXIL +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -DTARGET=spv -check-prefixes=CHECK,CHECK-SPIRV + +// CHECK-DXIL: define hidden void @ +// CHECK-SPIRV: define hidden spir_func void @ +void test_AllMemoryBarrierWithGroupSync() { +// CHECK-DXIL: call void @llvm.[[TARGET]].all.memory.barrier.with.group.sync() +// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].all.memory.barrier.with.group.sync() + AllMemoryBarrierWithGroupSync(); +} + +// CHECK: declare void @llvm.[[TARGET]].all.memory.barrier.with.group.sync() #[[ATTRS:[0-9]+]] +// CHECK-NOT: attributes #[[ATTRS]] = {{.+}}memory(none){{.+}} +// CHECK: attributes #[[ATTRS]] = {{.+}}convergent{{.+}} diff --git a/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrier.hlsl b/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrier.hlsl new file mode 100644 index 0000000000000..e2c08f7775c8c --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrier.hlsl @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -DTARGET=dx -check-prefixes=CHECK,CHECK-DXIL +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -DTARGET=spv -check-prefixes=CHECK,CHECK-SPIRV + +// CHECK-DXIL: define hidden void @ +// CHECK-SPIRV: define hidden spir_func void @ +void test_DeviceMemoryBarrier() { +// CHECK-DXIL: call void @llvm.[[TARGET]].device.memory.barrier() +// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].device.memory.barrier() + DeviceMemoryBarrier(); +} + +// CHECK: declare void @llvm.[[TARGET]].device.memory.barrier() #[[ATTRS:[0-9]+]] +// CHECK-NOT: attributes #[[ATTRS]] = {{.+}}memory(none){{.+}} +// CHECK: attributes #[[ATTRS]] = {{.+}}convergent{{.+}} diff --git a/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrierWithGroupSync.hlsl b/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrierWithGroupSync.hlsl new file mode 100644 index 0000000000000..fa455f5f8338b --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/DeviceMemoryBarrierWithGroupSync.hlsl @@ -0,0 +1,20 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -DTARGET=dx -check-prefixes=CHECK,CHECK-DXIL +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple \ +// RUN: spirv-unknown-vulkan-compute %s \ +// RUN: -emit-llvm -disable-llvm-passes -o - | FileCheck %s \ +// RUN: -DTARGET=spv -check-prefixes=CHECK,CHECK-SPIRV + +// CHECK-DXIL: define hidden void @ +// CHECK-SPIRV: define hidden spir_func void @ +void test_DeviceMemoryBarrierWithGroupSync() { +// CHECK-DXIL: call void @llvm.[[TARGET]].device.memory.barrier.with.group.sync() +// CHECK-SPIRV: call spir_func void @llvm.[[TARGET]].device.memory.barrier.with.group.sync() + DeviceMemoryBarrierWithGroupSync(); +} + +// CHECK: declare void @llvm.[[TARGET]].device.memory.barrier.with.group.sync() #[[ATTRS:[0-9]+]] +// CHECK-NOT: attributes #[[ATTRS]] = {{.+}}memory(none){{.+}} +// CHECK: attributes #[[ATTRS]] = {{.+}}convergent{{.+}} diff --git a/clang/test/SemaHLSL/BuiltIns/AllMemoryBarrier-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/AllMemoryBarrier-errors.hlsl new file mode 100644 index 0000000000000..63fd11a3095d8 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/AllMemoryBarrier-errors.hlsl @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify + +void test_too_many_arg() { + __builtin_hlsl_all_memory_barrier(0); + // expected-error@-1 {{too many arguments to function call, expected 0, have 1}} +} diff --git a/clang/test/SemaHLSL/BuiltIns/AllMemoryBarrierWithGroupSync-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/AllMemoryBarrierWithGroupSync-errors.hlsl new file mode 100644 index 0000000000000..7a50be66ebb8c --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/AllMemoryBarrierWithGroupSync-errors.hlsl @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify + +void test_too_many_arg() { + __builtin_hlsl_all_memory_barrier_with_group_sync(0); + // expected-error@-1 {{too many arguments to function call, expected 0, have 1}} +} diff --git a/clang/test/SemaHLSL/BuiltIns/DeviceMemoryBarrier-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/DeviceMemoryBarrier-errors.hlsl new file mode 100644 index 0000000000000..16a57a36fc399 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/DeviceMemoryBarrier-errors.hlsl @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify + +void test_too_many_arg() { + __builtin_hlsl_device_memory_barrier(0); + // expected-error@-1 {{too many arguments to function call, expected 0, have 1}} +} diff --git a/clang/test/SemaHLSL/BuiltIns/DeviceMemoryBarrierWithGroupSync-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/DeviceMemoryBarrierWithGroupSync-errors.hlsl new file mode 100644 index 0000000000000..ddbd7ba768009 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/DeviceMemoryBarrierWithGroupSync-errors.hlsl @@ -0,0 +1,6 @@ +// RUN: %clang_cc1 -finclude-default-header -triple dxil-pc-shadermodel6.6-library %s -emit-llvm-only -disable-llvm-passes -verify + +void test_too_many_arg() { + __builtin_hlsl_device_memory_barrier_with_group_sync(0); + // expected-error@-1 {{too many arguments to function call, expected 0, have 1}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index 728bf47a17516..fecbff82e3638 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -285,6 +285,18 @@ def int_dx_firstbituhigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, def int_dx_firstbitshigh : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; def int_dx_firstbitlow : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_anyint_ty], [IntrNoMem]>; +def int_dx_all_memory_barrier + : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; + +def int_dx_all_memory_barrier_with_group_sync + : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; + +def int_dx_device_memory_barrier + : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; + +def int_dx_device_memory_barrier_with_group_sync + : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; + def int_dx_group_memory_barrier : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index d2a5fa1f08724..e0c64277ae342 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -157,6 +157,10 @@ def int_spv_rsqrt : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty] def int_spv_quad_read_across_y : DefaultAttrsIntrinsic<[llvm_any_ty], [LLVMMatchType<0>], [IntrConvergent, IntrNoMem]>; def int_spv_sign : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i32_ty>], [llvm_any_ty], [IntrNoMem]>; def int_spv_radians : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty], [IntrNoMem]>; + def int_spv_all_memory_barrier : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; + def int_spv_all_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; + def int_spv_device_memory_barrier : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; + def int_spv_device_memory_barrier_with_group_sync : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; def int_spv_group_memory_barrier : DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; def int_spv_group_memory_barrier_with_group_sync : ClangBuiltin<"__builtin_spirv_group_barrier">, DefaultAttrsIntrinsic<[], [], [IntrConvergent]>; diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 0a1e0114aa3bb..41f09f9db1017 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -931,6 +931,14 @@ def GetDimensions : DXILOp<72, getDimensions> { def Barrier : DXILOp<80, barrier> { let Doc = "inserts a memory barrier in the shader"; let intrinsics = [ + IntrinSelect]>, + IntrinSelect]>, + IntrinSelect]>, + IntrinSelect]>, IntrinSelect]>, IntrinSelect