diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst index 6ff8034cac00c..0b401357223ec 100644 --- a/llvm/docs/SPIRVUsage.rst +++ b/llvm/docs/SPIRVUsage.rst @@ -213,6 +213,8 @@ list of supported SPIR-V extensions, sorted alphabetically by their extension na - Adds a bitwise instruction on three operands and a look-up table index for specifying the bitwise operation to perform. * - ``SPV_INTEL_subgroup_matrix_multiply_accumulate`` - Adds an instruction to compute the matrix product of an M x K matrix with a K x N matrix and then add an M x N matrix. + * - ``SPV_INTEL_2d_block_io`` + - Adds additional subgroup block load and store instructions to read two-dimensional blocks of data from a two-dimensional region of memory, or to write two-dimensional blocks of data to a two dimensional region of memory. To enable multiple extensions, list them separated by comma. For example, to enable support for atomic operations on floating-point numbers and arbitrary precision integers, use: diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index c516be0297e66..a2ba5a63f9770 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -697,7 +697,8 @@ static bool buildAtomicStoreInst(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, SPIRVGlobalRegistry *GR) { if (Call->isSpirvOp()) - return buildOpFromWrapper(MIRBuilder, SPIRV::OpAtomicStore, Call, Register(0)); + return buildOpFromWrapper(MIRBuilder, SPIRV::OpAtomicStore, Call, + Register(0)); Register ScopeRegister = buildConstantIntReg32(SPIRV::Scope::Device, MIRBuilder, GR); @@ -2296,6 +2297,15 @@ static bool generateExtendedBitOpsInst(const SPIRV::IncomingCall *Call, return buildExtendedBitOpsInst(Call, Opcode, MIRBuilder, GR); } +static bool generateSubgroup2DBlockInst(const SPIRV::IncomingCall *Call, + MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + const SPIRV::DemangledBuiltin *Builtin = Call->Builtin; + unsigned Opcode = + SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode; + return buildOpFromWrapper(MIRBuilder, Opcode, Call, Register(0)); +} + static bool generateBindlessImageINTELInst(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, SPIRVGlobalRegistry *GR) { @@ -2902,6 +2912,8 @@ std::optional lowerBuiltin(const StringRef DemangledCall, return generateBindlessImageINTELInst(Call.get(), MIRBuilder, GR); case SPIRV::TernaryBitwiseINTEL: return generateTernaryBitwiseFunctionINTELInst(Call.get(), MIRBuilder, GR); + case SPIRV::Subgroup2DBlock: + return generateSubgroup2DBlockInst(Call.get(), MIRBuilder, GR); } return false; } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index 59cd38126cc01..209ba40f827dd 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -68,6 +68,7 @@ def ICarryBorrow : BuiltinGroup; def ExtendedBitOps : BuiltinGroup; def BindlessINTEL : BuiltinGroup; def TernaryBitwiseINTEL : BuiltinGroup; +def Subgroup2DBlock: BuiltinGroup; //===----------------------------------------------------------------------===// // Class defining a demangled builtin record. The information in the record @@ -718,6 +719,13 @@ defm : DemangledNativeBuiltin<"__spirv_ConvertHandleToSampledImageINTEL", OpenCL // SPV_INTEL_ternary_bitwise_function builtin records: defm : DemangledNativeBuiltin<"__spirv_BitwiseFunctionINTEL", OpenCL_std, TernaryBitwiseINTEL, 4, 4, OpBitwiseFunctionINTEL>; +//SPV_INTEL_2d_block_io +defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockLoadINTEL", OpenCL_std, Subgroup2DBlock, 10, 10, OpSubgroup2DBlockLoadINTEL>; +defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockLoadTransposeINTEL", OpenCL_std, Subgroup2DBlock, 10, 10, OpSubgroup2DBlockLoadTransposeINTEL>; +defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockLoadTransformINTEL", OpenCL_std, Subgroup2DBlock, 10, 10, OpSubgroup2DBlockLoadTransformINTEL>; +defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockPrefetchINTEL", OpenCL_std, Subgroup2DBlock, 9, 9, OpSubgroup2DBlockPrefetchINTEL>; +defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockStoreINTEL", OpenCL_std, Subgroup2DBlock, 10, 10, OpSubgroup2DBlockStoreINTEL>; + //===----------------------------------------------------------------------===// // Class defining a work/sub group builtin that should be translated into a // SPIR-V instruction using the defined properties. diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index 56cbd9414c9ee..e6cb8cee66a60 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -97,7 +97,9 @@ static const std::map> SPIRV::Extension::Extension:: SPV_INTEL_subgroup_matrix_multiply_accumulate}, {"SPV_INTEL_ternary_bitwise_function", - SPIRV::Extension::Extension::SPV_INTEL_ternary_bitwise_function}}; + SPIRV::Extension::Extension::SPV_INTEL_ternary_bitwise_function}, + {"SPV_INTEL_2d_block_io", + SPIRV::Extension::Extension::SPV_INTEL_2d_block_io}}; bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName, StringRef ArgValue, diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index 6d8c84945d7d4..e66a1a5d05f73 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -936,3 +936,20 @@ def OpAliasScopeListDeclINTEL: Op<5913, (outs ID:$res), (ins variable_ops), // SPV_INTEL_ternary_bitwise_function def OpBitwiseFunctionINTEL: Op<6242, (outs ID:$res), (ins TYPE:$type, ID:$a, ID:$b, ID:$c, ID:$lut_index), "$res = OpBitwiseFunctionINTEL $type $a $b $c $lut_index">; + +//SPV_INTEL_2d_block_io +def OpSubgroup2DBlockLoadINTEL: Op<6231, (outs), + (ins ID:$elementSize, ID:$blockWidth, ID:$blockHeight, ID:$blockCount, ID:$srcBasePointer, ID:$memoryWidth, ID:$memoryHeight, ID:$memoryPitch, ID:$coordinate, ID:$dstPointer), + "OpSubgroup2DBlockLoadINTEL $elementSize $blockWidth $blockHeight $blockCount $srcBasePointer $memoryWidth $memoryHeight $memoryPitch $coordinate $dstPointer">; +def OpSubgroup2DBlockLoadTransposeINTEL: Op<6232, (outs), + (ins ID:$elementSize, ID:$blockWidth, ID:$blockHeight, ID:$blockCount, ID:$srcBasePointer, ID:$memoryWidth, ID:$memoryHeight, ID:$memoryPitch, ID:$coordinate, ID:$dstPointer), + "OpSubgroup2DBlockLoadTransposeINTEL $elementSize $blockWidth $blockHeight $blockCount $srcBasePointer $memoryWidth $memoryHeight $memoryPitch $coordinate $dstPointer">; +def OpSubgroup2DBlockLoadTransformINTEL: Op<6233, (outs), + (ins ID:$elementSize, ID:$blockWidth, ID:$blockHeight, ID:$blockCount, ID:$srcBasePointer, ID:$memoryWidth, ID:$memoryHeight, ID:$memoryPitch, ID:$coordinate, ID:$dstPointer), + "OpSubgroup2DBlockLoadTransformINTEL $elementSize $blockWidth $blockHeight $blockCount $srcBasePointer $memoryWidth $memoryHeight $memoryPitch $coordinate $dstPointer">; +def OpSubgroup2DBlockPrefetchINTEL: Op<6234, (outs), + (ins ID:$elementSize, ID:$blockWidth, ID:$blockHeight, ID:$blockCount, ID:$srcPointer, ID:$memoryWidth, ID:$memoryHeight, ID:$memoryPitch, ID:$coordinate), + "OpSubgroup2DBlockPrefetchINTEL $elementSize $blockWidth $blockHeight $blockCount $srcPointer $memoryWidth $memoryHeight $memoryPitch $coordinate">; +def OpSubgroup2DBlockStoreINTEL: Op<6235, (outs), + (ins ID:$elementSize, ID:$blockWidth, ID:$blockHeight, ID:$blockCount, ID:$srcPointer, ID:$dstBasePointer, ID:$memoryWidth, ID:$memoryHeight, ID:$memoryPitch, ID:$coordinate), + "OpSubgroup2DBlockStoreINTEL $elementSize $blockWidth $blockHeight $blockCount $srcPointer $dstBasePointer $memoryWidth $memoryHeight $memoryPitch $coordinate">; diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index 6d2ecd563d200..e63ea3a3f1b57 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -1544,6 +1544,39 @@ void addInstrRequirements(const MachineInstr &MI, Reqs.addCapability(SPIRV::Capability::FunctionPointersINTEL); } break; + + case SPIRV::OpSubgroup2DBlockLoadINTEL: + case SPIRV::OpSubgroup2DBlockPrefetchINTEL: + case SPIRV::OpSubgroup2DBlockStoreINTEL: { + if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_2d_block_io)) + report_fatal_error( + "OpSubgroup2DBlockLoadTransposeINTEL instruction requires the " + "following SPIR-V extension: SPV_INTEL_2d_block_io", + false); + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_2d_block_io); + Reqs.addCapability(SPIRV::Capability::Subgroup2DBlockIOINTEL); + break; + } + case SPIRV::OpSubgroup2DBlockLoadTransformINTEL: { + if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_2d_block_io)) + report_fatal_error( + "OpSubgroup2DBlockLoadTransformINTEL instruction requires the " + "following SPIR-V extension: SPV_INTEL_2d_block_io", + false); + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_2d_block_io); + Reqs.addCapability(SPIRV::Capability::Subgroup2DBlockTransformINTEL); + break; + } + case SPIRV::OpSubgroup2DBlockLoadTransposeINTEL: { + if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_2d_block_io)) + report_fatal_error( + "OpSubgroup2DBlockLoadTransposeINTEL instruction requires the " + "following SPIR-V extension: SPV_INTEL_2d_block_io", + false); + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_2d_block_io); + Reqs.addCapability(SPIRV::Capability::Subgroup2DBlockTransposeINTEL); + break; + } case SPIRV::OpAtomicFAddEXT: case SPIRV::OpAtomicFMinEXT: case SPIRV::OpAtomicFMaxEXT: diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index cc32200a0a261..36e156299e923 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -315,6 +315,7 @@ defm SPV_INTEL_memory_access_aliasing : ExtensionOperand<118>; defm SPV_INTEL_fp_max_error : ExtensionOperand<119>; defm SPV_INTEL_ternary_bitwise_function : ExtensionOperand<120>; defm SPV_INTEL_subgroup_matrix_multiply_accumulate : ExtensionOperand<121>; +defm SPV_INTEL_2d_block_io : ExtensionOperand<122>; //===----------------------------------------------------------------------===// // Multiclass used to define Capabilities enum values and at the same time @@ -517,6 +518,9 @@ defm MemoryAccessAliasingINTEL : CapabilityOperand<5910, 0, 0, [SPV_INTEL_memory defm FPMaxErrorINTEL : CapabilityOperand<6169, 0, 0, [SPV_INTEL_fp_max_error], []>; defm TernaryBitwiseFunctionINTEL : CapabilityOperand<6241, 0, 0, [SPV_INTEL_ternary_bitwise_function], []>; defm SubgroupMatrixMultiplyAccumulateINTEL : CapabilityOperand<6236, 0, 0, [SPV_INTEL_subgroup_matrix_multiply_accumulate], []>; +defm Subgroup2DBlockIOINTEL : CapabilityOperand<6228, 0, 0, [SPV_INTEL_2d_block_io], []>; +defm Subgroup2DBlockTransformINTEL : CapabilityOperand<6229, 0, 0, [SPV_INTEL_2d_block_io], []>; +defm Subgroup2DBlockTransposeINTEL : CapabilityOperand<6230, 0, 0, [SPV_INTEL_2d_block_io], []>; //===----------------------------------------------------------------------===// // Multiclass used to define SourceLanguage enum values and at the same time diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_2d_block_io/2d_block_io_generic.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_2d_block_io/2d_block_io_generic.ll new file mode 100644 index 0000000000000..67f999571694b --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_2d_block_io/2d_block_io_generic.ll @@ -0,0 +1,74 @@ +; Generated with: +; source.cl: +; void __spirv_Subgroup2DBlockLoadINTEL( int element_size, int block_width, int block_height, int block_count, const __global void* src_base_pointer, int memory_width, int memory_height, int memory_pitch, int2 coordinate, private void* dst_pointer); +; void __spirv_Subgroup2DBlockLoadTransposeINTEL(int element_size, int block_width, int block_height, int block_count, const __global void* src_base_pointer, int memory_width, int memory_height, int memory_pitch, int2 coordinate, private void* dst_pointer); +; void __spirv_Subgroup2DBlockLoadTransformINTEL(int element_size, int block_width, int block_height, int block_count, const __global void* src_base_pointer, int memory_width, int memory_height, int memory_pitch, int2 coordinate, private void* dst_pointer); +; void __spirv_Subgroup2DBlockPrefetchINTEL( int element_size, int block_width, int block_height, int block_count, const __global void* src_base_pointer, int memory_width, int memory_height, int memory_pitch, int2 coordinate ); +; void __spirv_Subgroup2DBlockStoreINTEL( int element_size, int block_width, int block_height, int block_count, const private void* src_pointer, __global void* dst_base_pointer, int memory_width, int memory_height, int memory_pitch, int2 coordinate ); +; +; void foo(const __global void* base_address, __global void* dst_base_pointer, int width, int height, int pitch, int2 coord, private void* dst_pointer, const private void* src_pointer) { +; const int i = 42; +; __spirv_Subgroup2DBlockLoadINTEL(i, i, i, i, base_address, width, height, pitch, coord, dst_pointer); +; __spirv_Subgroup2DBlockLoadTransformINTEL(i, i, i, i, base_address, width, height, pitch, coord, dst_pointer); +; __spirv_Subgroup2DBlockLoadTransposeINTEL(i, i, i, i, base_address, width, height, pitch, coord, dst_pointer); +; __spirv_Subgroup2DBlockPrefetchINTEL(i, i, i, i, base_address, width, height, pitch, coord); +; __spirv_Subgroup2DBlockStoreINTEL(i, i, i, i, src_pointer, dst_base_pointer, width, height, pitch, coord); +; } +; clang -cc1 -cl-std=clc++2021 -triple spir64-unknown-unknown -emit-llvm -finclude-default-header source.cl -o tmp.ll + + + +; RUN: llc -verify-machineinstrs -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_2d_block_io %s -o %t.spt +; RUN: FileCheck %s --input-file=%t.spt +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_2d_block_io %s -o - -filetype=obj | spirv-val %} + +; CHECK: OpCapability Subgroup2DBlockIOINTEL +; CHECK: OpCapability Subgroup2DBlockTransformINTEL +; CHECK: OpCapability Subgroup2DBlockTransposeINTEL +; CHECK: OpExtension "SPV_INTEL_2d_block_io" +; CHECK: %[[#Int8Ty:]] = OpTypeInt 8 0 +; CHECK: %[[#GlbPtrTy:]] = OpTypePointer CrossWorkgroup %[[#Int8Ty]] +; CHECK: %[[#Int32Ty:]] = OpTypeInt 32 0 +; CHECK: %[[#VectorTy:]] = OpTypeVector %[[#Int32Ty]] 2 +; CHECK: %[[#PrvPtrTy:]] = OpTypePointer Function %[[#Int8Ty]] +; CHECK: %[[#VoidTy:]] = OpTypeVoid +; CHECK: %[[#Const42:]] = OpConstant %[[#Int32Ty]] 42 +; CHECK: %[[#BaseSrc:]] = OpFunctionParameter %[[#GlbPtrTy]] +; CHECK: %[[#BaseDst:]] = OpFunctionParameter %[[#GlbPtrTy]] +; CHECK: %[[#Width:]] = OpFunctionParameter %[[#Int32Ty]] +; CHECK: %[[#Height:]] = OpFunctionParameter %[[#Int32Ty]] +; CHECK: %[[#Pitch:]] = OpFunctionParameter %[[#Int32Ty]] +; CHECK: %[[#Coord:]] = OpFunctionParameter %[[#VectorTy]] +; CHECK: %[[#Dst:]] = OpFunctionParameter %[[#PrvPtrTy]] +; CHECK: %[[#Src:]] = OpFunctionParameter %[[#PrvPtrTy]] +; CHECK: OpSubgroup2DBlockLoadINTEL %[[#Const42]] %[[#Const42]] %[[#Const42]] %[[#Const42]] %[[#BaseSrc]] %[[#Width]] %[[#Height]] %[[#Pitch]] %[[#Coord]] %[[#Dst]] +; CHECK: OpSubgroup2DBlockLoadTransformINTEL %[[#Const42]] %[[#Const42]] %[[#Const42]] %[[#Const42]] %[[#BaseSrc]] %[[#Width]] %[[#Height]] %[[#Pitch]] %[[#Coord]] %[[#Dst]] +; CHECK: OpSubgroup2DBlockLoadTransposeINTEL %[[#Const42]] %[[#Const42]] %[[#Const42]] %[[#Const42]] %[[#BaseSrc]] %[[#Width]] %[[#Height]] %[[#Pitch]] %[[#Coord]] %[[#Dst]] +; CHECK: OpSubgroup2DBlockPrefetchINTEL %[[#Const42]] %[[#Const42]] %[[#Const42]] %[[#Const42]] %[[#BaseSrc]] %[[#Width]] %[[#Height]] %[[#Pitch]] %[[#Coord]] +; CHECK: OpSubgroup2DBlockStoreINTEL %[[#Const42]] %[[#Const42]] %[[#Const42]] %[[#Const42]] %[[#Src]] %[[#BaseDst]] %[[#Width]] %[[#Height]] %[[#Pitch]] %[[#Coord]] + + + +define spir_func void @foo(ptr addrspace(1) %base_address, ptr addrspace(1) %dst_base_pointer, i32 %width, i32 %height, i32 %pitch, <2 x i32> %coord, ptr %dst_pointer, ptr %src_pointer) { +entry: + call spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPU3AS1KviiiDv2_iPv(i32 42, i32 42, i32 42, i32 42, ptr addrspace(1) %base_address, i32 %width, i32 %height, i32 %pitch, <2 x i32> %coord, ptr %dst_pointer) + call spir_func void @_Z41__spirv_Subgroup2DBlockLoadTransformINTELiiiiPU3AS1KviiiDv2_iPv(i32 42, i32 42, i32 42, i32 42, ptr addrspace(1) %base_address, i32 %width, i32 %height, i32 %pitch, <2 x i32> %coord, ptr %dst_pointer) + call spir_func void @_Z41__spirv_Subgroup2DBlockLoadTransposeINTELiiiiPU3AS1KviiiDv2_iPv(i32 42, i32 42, i32 42, i32 42, ptr addrspace(1) %base_address, i32 %width, i32 %height, i32 %pitch, <2 x i32> %coord, ptr %dst_pointer) + call spir_func void @_Z36__spirv_Subgroup2DBlockPrefetchINTELiiiiPU3AS1KviiiDv2_i(i32 42, i32 42, i32 42, i32 42, ptr addrspace(1) %base_address, i32 %width, i32 %height, i32 %pitch, <2 x i32> %coord) + call spir_func void @_Z33__spirv_Subgroup2DBlockStoreINTELiiiiPKvPU3AS1viiiDv2_i(i32 42, i32 42, i32 42, i32 42, ptr %src_pointer, ptr addrspace(1) %dst_base_pointer, i32 %width, i32 %height, i32 %pitch, <2 x i32> %coord) + ret void +} + +declare spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPU3AS1KviiiDv2_iPv(i32, i32, i32, i32, ptr addrspace(1), i32, i32, i32, <2 x i32>, ptr) +declare spir_func void @_Z41__spirv_Subgroup2DBlockLoadTransformINTELiiiiPU3AS1KviiiDv2_iPv(i32, i32, i32, i32, ptr addrspace(1), i32, i32, i32, <2 x i32>, ptr) +declare spir_func void @_Z41__spirv_Subgroup2DBlockLoadTransposeINTELiiiiPU3AS1KviiiDv2_iPv(i32, i32, i32, i32, ptr addrspace(1), i32, i32, i32, <2 x i32>, ptr) +declare spir_func void @_Z36__spirv_Subgroup2DBlockPrefetchINTELiiiiPU3AS1KviiiDv2_i(i32, i32, i32, i32, ptr addrspace(1), i32, i32, i32, <2 x i32>) +declare spir_func void @_Z33__spirv_Subgroup2DBlockStoreINTELiiiiPKvPU3AS1viiiDv2_i(i32, i32, i32, i32, ptr, ptr addrspace(1), i32, i32, i32, <2 x i32>) + +!opencl.spir.version = !{!0} +!spirv.Source = !{!1} +!llvm.ident = !{!2} + +!0 = !{i32 1, i32 0} +!1 = !{i32 4, i32 100000} +!2 = !{!"clang version 17.0.0"}