From 50ac772a9561462a74ea537df0ef4d2d2a657c29 Mon Sep 17 00:00:00 2001 From: "Lee, Sang Ik" Date: Tue, 30 Sep 2025 21:06:03 +0000 Subject: [PATCH 1/3] [MLIR][XeVM] Add XeVM 1D block operations to OpenCL calls conversion. --- mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp | 85 ++++++++++++++++++- .../Conversion/XeVMToLLVM/xevm-to-llvm.mlir | 56 ++++++++++++ 2 files changed, 140 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp index 0f90acf0d9c39..f10ca5a80fa04 100644 --- a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp +++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp @@ -216,6 +216,10 @@ static std::optional getCacheControl(BlockLoad2dOp op) { return op.getCacheControl(); } +static std::optional getCacheControl(BlockLoadOp op) { + return op.getCacheControl(); +} + static std::optional getCacheControl(BlockPrefetch2dOp op) { return op.getCacheControl(); } @@ -224,6 +228,10 @@ static std::optional getCacheControl(BlockStore2dOp op) { return op.getCacheControl(); } +static std::optional getCacheControl(BlockStoreOp op) { + return op.getCacheControl(); +} + static std::optional getCacheControl(LLVM::LoadOp op) { if (op->hasAttr("cache_control")) { auto attr = op->getAttrOfType("cache_control"); @@ -265,6 +273,7 @@ getCacheControlMetadata(ConversionPatternRewriter &rewriter, OpType op) { constexpr bool isLoad = std::is_same_v || std::is_same_v || std::is_same_v || + std::is_same_v || std::is_same_v; const int32_t controlKey{isLoad ? loadCacheControlKey : storeCacheControlKey}; SmallVector decorationsL1{ @@ -620,6 +629,77 @@ class LoadStorePrefetchToOCLPattern : public OpConversionPattern { return success(); } }; + +template +class BlockLoadStore1DToOCLPattern : public OpConversionPattern { + using OpConversionPattern::OpConversionPattern; + LogicalResult + matchAndRewrite(OpType op, typename OpType::Adaptor adaptor, + ConversionPatternRewriter &rewriter) const override { + constexpr bool isStore = std::is_same_v; + // Get OpenCL function name + // https://registry.khronos.org/OpenCL/extensions/ + // intel/cl_intel_subgroup_local_block_io.html + std::string funcName{"intel_sub_group_block_"}; + funcName += isStore ? "write_u" : "read_u"; + VectorType vecType; + if constexpr (isStore) + vecType = op.getVal().getType(); + else + vecType = op.getType(); + Type elemType = vecType.getElementType(); + funcName += getTypeMangling(elemType); + if (vecType.getNumElements() > 1) + funcName += std::to_string(vecType.getNumElements()); + SmallVector argTypes{}; + // XeVM BlockLoad/StoreOp always use signless integer types + // but OpenCL builtins expect unsigned types + // use unsigned types for mangling + SmallVector isUnsigned{}; + // arg0: pointer to the src/dst address + // arg1 - only if store : vector to store + // Prepare arguments + SmallVector args{}; + args.push_back(op.getPtr()); + argTypes.push_back(op.getPtr().getType()); + isUnsigned.push_back(true); + Type retType; + if constexpr (isStore) { + args.push_back(op.getVal()); + argTypes.push_back(op.getVal().getType()); + isUnsigned.push_back(true); + retType = LLVM::LLVMVoidType::get(rewriter.getContext()); + } else { + /* + retType = VectorType::get(vecType.getShape(), + rewriter.getIntegerType(elemType.getIntOrFloatBitWidth(), + false)); + */ + retType = vecType; + } + funcName = std::string("_Z") + std::to_string(funcName.size()) + funcName + + "PU3AS" + + std::to_string(op.getPtr().getType().getAddressSpace()); + funcName += getTypeMangling(elemType, /*isUnsigned=*/true); + if constexpr (isStore) + funcName += getTypeMangling(vecType, /*isUnsigned=*/true); + LLVMFuncAttributeOptions funcAttr{noUnwindWillReturnAttrs}; + + LLVM::CallOp call = + createDeviceFunctionCall(rewriter, funcName, retType, argTypes, args, + {}, funcAttr, op.getOperation()); + if (std::optional optCacheControls = + getCacheControlMetadata(rewriter, op)) { + call->setAttr(XeVMDialect::getCacheControlsAttrName(), *optCacheControls); + } + if constexpr (isStore) + rewriter.eraseOp(op); + else + rewriter.replaceOp(op, call->getResult(0)); + return success(); + } +}; + template class LLVMLoadStoreToOCLPattern : public OpConversionPattern { using OpConversionPattern::OpConversionPattern; @@ -695,7 +775,10 @@ void ::mlir::populateXeVMToLLVMConversionPatterns(ConversionTarget &target, LoadStorePrefetchToOCLPattern, MMAToOCLPattern, MemfenceToOCLPattern, PrefetchToOCLPattern, LLVMLoadStoreToOCLPattern, - LLVMLoadStoreToOCLPattern>(patterns.getContext()); + LLVMLoadStoreToOCLPattern, + BlockLoadStore1DToOCLPattern, + BlockLoadStore1DToOCLPattern>( + patterns.getContext()); } void ::mlir::registerConvertXeVMToLLVMInterface(DialectRegistry ®istry) { diff --git a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir index 8f60a0797652b..c70d9f4032ae2 100644 --- a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir +++ b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir @@ -261,3 +261,59 @@ llvm.func @llvm.store(%a: !llvm.ptr<1>, %val: i32) { llvm.store %val, %a {cache_control=#xevm.store_cache_control} : i32, !llvm.ptr<1> llvm.return } + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z30intel_sub_group_block_read_us8PU3AS1t +// CHECK: llvm.func @blockload_as1(%[[ARG0:.*]]: !llvm.ptr<1>) +llvm.func @blockload_as1(%ptr: !llvm.ptr<1>) -> vector<8xi16> { + // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z30intel_sub_group_block_read_us8PU3AS1t(%[[ARG0]]) + // CHECK-SAME: {function_type = !llvm.func (ptr<1>)>, linkage = #llvm.linkage, + // CHECK-SAME: no_unwind, sym_name = "_Z30intel_sub_group_block_read_us8PU3AS1t", + // CHECK-SAME: visibility_ = 0 : i64, will_return, xevm.DecorationCacheControl = + // CHECK-SAME: [6442 : i32, 0 : i32, 1 : i32, 0 : i32], + // CHECK-SAME: [6442 : i32, 1 : i32, 1 : i32, 0 : i32] + %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control}> : (!llvm.ptr<1>) -> vector<8xi16> + llvm.return %loaded_a : vector<8xi16> +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_read_uc16PU3AS3h(!llvm.ptr<3>) +// CHECK: llvm.func @blockload_as3(%[[ARG0:.*]]: !llvm.ptr<3>) +llvm.func @blockload_as3(%ptr: !llvm.ptr<3>) -> vector<16xi8> { + // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z31intel_sub_group_block_read_uc16PU3AS3h(%[[ARG0]]) + // CHECK-SAME: {function_type = !llvm.func (ptr<3>)>, linkage = #llvm.linkage, + // CHECK-SAME: no_unwind, sym_name = "_Z31intel_sub_group_block_read_uc16PU3AS3h", visibility_ = 0 : i64, + // CHECK-SAME: will_return, xevm.DecorationCacheControl = + // CHECK-SAME: [6442 : i32, 0 : i32, 1 : i32, 0 : i32], + // CHECK-SAME: [6442 : i32, 1 : i32, 1 : i32, 0 : i32] + %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control}> : (!llvm.ptr<3>) -> vector<16xi8> + llvm.return %loaded_a : vector<16xi8> +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j +// CHECK: llvm.func @blockstore_as1(%[[ARG0:.*]]: !llvm.ptr<1>, %[[ARG1:.*]]: vector<8xi32>) { +llvm.func @blockstore_as1(%ptr: !llvm.ptr<1>, %data: vector<8xi32>) { + // CHECK: llvm.call spir_funccc @_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j(%[[ARG0]], %[[ARG1]]) + // CHECK-SAME: {function_type = !llvm.func, vector<8xi32>)>, linkage = #llvm.linkage, + // CHECK-SAME: no_unwind, sym_name = "_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j", visibility_ = 0 : i64, + // CHECK-SAME: will_return, xevm.DecorationCacheControl = + // CHECK-SAME: [6443 : i32, 0 : i32, 2 : i32, 0 : i32], + // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32, 0 : i32] + xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control}> : (!llvm.ptr<1>, vector<8xi32>) + llvm.return +} + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m +// CHECK: llvm.func @blockstore_as3(%[[ARG0:.*]]: !llvm.ptr<3>, %[[ARG1:.*]]: vector<2xi64>) { +llvm.func @blockstore_as3(%ptr: !llvm.ptr<3>, %data: vector<2xi64>) { + // CHECK: llvm.call spir_funccc @_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m(%[[ARG0]], %[[ARG1]]) + // CHECK-SAME: {function_type = !llvm.func, vector<2xi64>)>, linkage = #llvm.linkage, + // CHECK-SAME: no_unwind, sym_name = "_Z31intel_sub_group_block_write_ul2PU3AS3mDv2_m", visibility_ = 0 : i64, + // CHECK-SAME: will_return, xevm.DecorationCacheControl = + // CHECK-SAME: [6443 : i32, 0 : i32, 2 : i32, 0 : i32], + // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32, 0 : i32] + xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control}> : (!llvm.ptr<3>, vector<2xi64>) + llvm.return +} From 986761f736adbd79e25257cd4e5a1394a7d9c5a1 Mon Sep 17 00:00:00 2001 From: "Lee, Sang Ik" Date: Tue, 7 Oct 2025 18:09:49 +0000 Subject: [PATCH 2/3] Update code in anticipation of vector or scalar value or result type. --- mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp index f10ca5a80fa04..a703cc8015c8c 100644 --- a/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp +++ b/mlir/lib/Conversion/XeVMToLLVM/XeVMToLLVM.cpp @@ -641,16 +641,21 @@ class BlockLoadStore1DToOCLPattern : public OpConversionPattern { // https://registry.khronos.org/OpenCL/extensions/ // intel/cl_intel_subgroup_local_block_io.html std::string funcName{"intel_sub_group_block_"}; - funcName += isStore ? "write_u" : "read_u"; - VectorType vecType; - if constexpr (isStore) - vecType = op.getVal().getType(); - else - vecType = op.getType(); - Type elemType = vecType.getElementType(); + // Value or Result type can be vector or scalar + Type valOrResTy; + if constexpr (isStore) { + funcName += "write_u"; + valOrResTy = op.getVal().getType(); + } else { + funcName += "read_u"; + valOrResTy = op.getType(); + } + // Get element type of the vector/scalar + VectorType vecTy = dyn_cast(valOrResTy); + Type elemType = vecTy ? vecTy.getElementType() : valOrResTy; funcName += getTypeMangling(elemType); - if (vecType.getNumElements() > 1) - funcName += std::to_string(vecType.getNumElements()); + if (vecTy) + funcName += std::to_string(vecTy.getNumElements()); SmallVector argTypes{}; // XeVM BlockLoad/StoreOp always use signless integer types // but OpenCL builtins expect unsigned types @@ -670,19 +675,14 @@ class BlockLoadStore1DToOCLPattern : public OpConversionPattern { isUnsigned.push_back(true); retType = LLVM::LLVMVoidType::get(rewriter.getContext()); } else { - /* - retType = VectorType::get(vecType.getShape(), - rewriter.getIntegerType(elemType.getIntOrFloatBitWidth(), - false)); - */ - retType = vecType; + retType = valOrResTy; } funcName = std::string("_Z") + std::to_string(funcName.size()) + funcName + "PU3AS" + std::to_string(op.getPtr().getType().getAddressSpace()); funcName += getTypeMangling(elemType, /*isUnsigned=*/true); if constexpr (isStore) - funcName += getTypeMangling(vecType, /*isUnsigned=*/true); + funcName += getTypeMangling(valOrResTy, /*isUnsigned=*/true); LLVMFuncAttributeOptions funcAttr{noUnwindWillReturnAttrs}; LLVM::CallOp call = From 0c7eae4b6e62e832ac34ed1029428bba11c60ce8 Mon Sep 17 00:00:00 2001 From: "Lee, Sang Ik" Date: Tue, 7 Oct 2025 20:56:19 +0000 Subject: [PATCH 3/3] Add tests for scalar value store and scalar result load. --- .../Conversion/XeVMToLLVM/xevm-to-llvm.mlir | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir index c70d9f4032ae2..b31a973ffd6a1 100644 --- a/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir +++ b/mlir/test/Conversion/XeVMToLLVM/xevm-to-llvm.mlir @@ -290,6 +290,20 @@ llvm.func @blockload_as3(%ptr: !llvm.ptr<3>) -> vector<16xi8> { llvm.return %loaded_a : vector<16xi8> } +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z29intel_sub_group_block_read_ucPU3AS3h(!llvm.ptr<3>) +// CHECK: llvm.func @blockload_scalar(%[[ARG0:.*]]: !llvm.ptr<3>) +llvm.func @blockload_scalar(%ptr: !llvm.ptr<3>) -> i8 { + // CHECK: %[[VAR0:.*]] = llvm.call spir_funccc @_Z29intel_sub_group_block_read_ucPU3AS3h(%[[ARG0]]) + // CHECK-SAME: {function_type = !llvm.func)>, linkage = #llvm.linkage, + // CHECK-SAME: no_unwind, sym_name = "_Z29intel_sub_group_block_read_ucPU3AS3h", visibility_ = 0 : i64, + // CHECK-SAME: will_return, xevm.DecorationCacheControl = + // CHECK-SAME: [6442 : i32, 0 : i32, 1 : i32, 0 : i32], + // CHECK-SAME: [6442 : i32, 1 : i32, 1 : i32, 0 : i32] + %loaded_a = xevm.blockload %ptr <{cache_control=#xevm.load_cache_control}> : (!llvm.ptr<3>) -> i8 + llvm.return %loaded_a : i8 +} + // ----- // CHECK-LABEL: llvm.func spir_funccc @_Z31intel_sub_group_block_write_ui8PU3AS1jDv8_j // CHECK: llvm.func @blockstore_as1(%[[ARG0:.*]]: !llvm.ptr<1>, %[[ARG1:.*]]: vector<8xi32>) { @@ -317,3 +331,17 @@ llvm.func @blockstore_as3(%ptr: !llvm.ptr<3>, %data: vector<2xi64>) { xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control}> : (!llvm.ptr<3>, vector<2xi64>) llvm.return } + +// ----- +// CHECK-LABEL: llvm.func spir_funccc @_Z30intel_sub_group_block_write_ulPU3AS3mm +// CHECK: llvm.func @blockstore_scalar(%[[ARG0:.*]]: !llvm.ptr<3>, %[[ARG1:.*]]: i64) { +llvm.func @blockstore_scalar(%ptr: !llvm.ptr<3>, %data: i64) { + // CHECK: llvm.call spir_funccc @_Z30intel_sub_group_block_write_ulPU3AS3mm(%[[ARG0]], %[[ARG1]]) + // CHECK-SAME: {function_type = !llvm.func, i64)>, linkage = #llvm.linkage, + // CHECK-SAME: no_unwind, sym_name = "_Z30intel_sub_group_block_write_ulPU3AS3mm", visibility_ = 0 : i64, + // CHECK-SAME: will_return, xevm.DecorationCacheControl = + // CHECK-SAME: [6443 : i32, 0 : i32, 2 : i32, 0 : i32], + // CHECK-SAME: [6443 : i32, 1 : i32, 2 : i32, 0 : i32] + xevm.blockstore %ptr, %data <{cache_control=#xevm.store_cache_control}> : (!llvm.ptr<3>, i64) + llvm.return +}