diff --git a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td index 7e142b20c0894..b13f5a9f2c9d9 100644 --- a/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td +++ b/mlir/include/mlir/Dialect/XeGPU/IR/XeGPUTypes.td @@ -82,7 +82,7 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", static-dim-list ::= decimal-literal `x` decimal-literal attr-list = (, encoding-attr)? (, layout-attr)? enconding-attr = (, memory_space = value)? (, arr_len = value)? (, boundary_check = value)? (, scattered = value)? - layout-attr = (, layout `<`sg_layout = value, sg_data = value, inst_data = value, lane_layout = value, lane_data = value, order = value`>`)? + layout-attr = DistributeLayoutAttr ``` Examples: @@ -158,8 +158,8 @@ def XeGPU_TensorDesc: XeGPUTypeDef<"TensorDesc", "tensor_desc", return llvm::dyn_cast_if_present(getEncoding()); } - LayoutAttr getLayoutAttr() const { - return llvm::dyn_cast_if_present(getLayout()); + DistributeLayoutAttr getLayoutAttr() const { + return llvm::dyn_cast_if_present(getLayout()); } xegpu::MemorySpace getMemorySpace() const { diff --git a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h index 0aa2cd45088f3..1b594f17e15ec 100644 --- a/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h +++ b/mlir/include/mlir/Dialect/XeGPU/Utils/XeGPUUtils.h @@ -219,10 +219,11 @@ void setTemporaryLayout(const T &operandOrResult, /// Helper function to check if the layout is packed. Layout is packed if it is /// 2D and lane_data[0] != 1 (data packed from col dimension). /// TODO: Move to target info. -bool requirePacked(const LayoutAttr layout); +bool requirePacked(const DistributeLayoutAttr layout); /// Helper function to check if the layout requires a transpose effect. -bool requireTranspose(const LayoutAttr layout, const uArch::uArch *uArch); +bool requireTranspose(const DistributeLayoutAttr layout, + const uArch::uArch *uArch); // Check if dst shape is an expansion of src shape by inserting unit dimensions. bool matchUnitDimExpansion(ArrayRef src, ArrayRef dst, diff --git a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp index 950371e17255f..64c56b5adf5d7 100644 --- a/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp +++ b/mlir/lib/Dialect/XeGPU/IR/XeGPUDialect.cpp @@ -1318,7 +1318,7 @@ mlir::Type TensorDescType::parse(AsmParser &parser) { mlir::Attribute attr; ParseResult res = parser.parseAttribute(attr); if (mlir::succeeded(res)) { - if (mlir::isa(attr)) { + if (mlir::isa(attr)) { layout = attr; continue; } diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp index 1ee0bc6ad9507..ef6a494b76638 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUBlocking.cpp @@ -270,12 +270,11 @@ void XeGPUBlockingPass::runOnOperation() { } auto getTileShapeAndCount = [](llvm::ArrayRef shape, - xegpu::LayoutAttr layout) { + xegpu::DistributeLayoutAttr layout) { int count = 1; SmallVector tileShape(shape); - if (layout && layout.getInstData()) { - DenseI32ArrayAttr instData = layout.getInstData(); - tileShape = llvm::to_vector_of(instData.asArrayRef()); + if (layout && !layout.getEffectiveInstDataAsInt().empty()) { + tileShape = layout.getEffectiveInstDataAsInt(); count = computeProduct(shape) / computeProduct(tileShape); } return std::make_pair(tileShape, count); @@ -308,7 +307,7 @@ void XeGPUBlockingPass::runOnOperation() { Type elemTy = type.getElementType(); ArrayRef shape = type.getShape(); - xegpu::LayoutAttr layout = type.getLayoutAttr(); + xegpu::DistributeLayoutAttr layout = type.getLayoutAttr(); if (layout && layout.isForWorkgroup()) return failure(); @@ -348,9 +347,9 @@ void XeGPUBlockingPass::runOnOperation() { if (chunkSize > 1) { int64_t blockedChunkSize = chunkSize; - auto instData = tdescTy.getLayoutAttr().getInstData(); + auto instData = tdescTy.getLayoutAttr().getEffectiveInstDataAsInt(); if (!instData.empty()) - blockedChunkSize = instData.asArrayRef().back(); + blockedChunkSize = instData.back(); // To create a new attribute with a different chunk_size: auto newEncoding = xegpu::ScatterTensorDescAttr::get( diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPeepHoleOptimizer.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPeepHoleOptimizer.cpp index 0c7977bb241df..3496756e8a6d3 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPeepHoleOptimizer.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUPeepHoleOptimizer.cpp @@ -145,10 +145,17 @@ static xegpu::TensorDescType tryOptimize(xegpu::TensorDescType tdescType, return tdescType; SmallVector supportedShape = {supportedHeight, supportedWidth}; + auto ctx = tdescType.getContext(); + auto origLayout = tdescType.getLayoutAttr(); + auto laneLayoutI64 = origLayout.getEffectiveLaneLayoutAsInt(); + SmallVector laneLayoutI32(laneLayoutI64.begin(), + laneLayoutI64.end()); + xegpu::LayoutAttr newLayout = xegpu::LayoutAttr::get( - tdescType.getContext(), tdescType.getLayoutAttr().getLaneLayout(), - DenseI32ArrayAttr::get(tdescType.getContext(), {1, 1}), - tdescType.getLayoutAttr().getOrder()); + ctx, /*lane_layout=*/DenseI32ArrayAttr::get(ctx, laneLayoutI32), + /*lane_data=*/DenseI32ArrayAttr::get(ctx, {1, 1}), + /*order=*/origLayout.getOrder()); + // Array length can not be larger than 1 for transpose case. return xegpu::TensorDescType::get(supportedShape, newElemTy, arrayLen, tdescType.getBoundaryCheck(), diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp index ecdf253d68182..d8ce24ddd5cb0 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUSubgroupDistribute.cpp @@ -256,7 +256,7 @@ struct CreateNdDescDistribution final : public gpu::WarpDistributionPattern { auto descOp = operand->get().getDefiningOp(); unsigned operandIdx = operand->getOperandNumber(); - xegpu::LayoutAttr layout = descOp.getType().getLayoutAttr(); + xegpu::DistributeLayoutAttr layout = descOp.getType().getLayoutAttr(); if (!layout) return rewriter.notifyMatchFailure( descOp, "the tensor descriptor lacks layout attribute"); @@ -342,7 +342,7 @@ struct StoreNdDistribution final : public gpu::WarpDistributionPattern { SmallVector offsetTypes = llvm::map_to_vector( offsetsAsValues, [](Value v) { return v.getType(); }); xegpu::TensorDescType tensorDescTy = storeOp.getTensorDescType(); - xegpu::LayoutAttr layout = tensorDescTy.getLayoutAttr(); + xegpu::DistributeLayoutAttr layout = tensorDescTy.getLayoutAttr(); if (!layout) return rewriter.notifyMatchFailure( storeOp, "the source tensor descriptor lacks layout attribute"); @@ -474,7 +474,7 @@ struct LoadNdDistribution final : public gpu::WarpDistributionPattern { offsetsAsValues, [](Value v) { return v.getType(); }); xegpu::TensorDescType tensorDescTy = loadOp.getTensorDescType(); - xegpu::LayoutAttr layout = tensorDescTy.getLayoutAttr(); + xegpu::DistributeLayoutAttr layout = tensorDescTy.getLayoutAttr(); if (!layout) return rewriter.notifyMatchFailure( loadOp, "the source tensor descriptor lacks layout attribute"); @@ -709,7 +709,8 @@ struct PrefetchNdDistribution final : public gpu::WarpDistributionPattern { SmallVector offsetTypes = llvm::map_to_vector( offsetsAsValues, [](Value v) { return v.getType(); }); - xegpu::LayoutAttr layout = prefetchOp.getTensorDescType().getLayoutAttr(); + xegpu::DistributeLayoutAttr layout = + prefetchOp.getTensorDescType().getLayoutAttr(); if (!layout) return rewriter.notifyMatchFailure( prefetchOp, "the source tensor descriptor lacks layout attribute"); diff --git a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp index a5b1df0f93f57..a095c19d66c15 100644 --- a/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp +++ b/mlir/lib/Dialect/XeGPU/Transforms/XeGPUWgToSgDistribute.cpp @@ -1644,7 +1644,7 @@ void XeGPUWgToSgDistributePass::runOnOperation() { converter.addConversion( [&](xegpu::TensorDescType type, SmallVectorImpl &result) -> std::optional { - xegpu::LayoutAttr layout = type.getLayoutAttr(); + xegpu::DistributeLayoutAttr layout = type.getLayoutAttr(); // Only convert WG-level tensor descs. SG-level or layout-less types // are already legal and should pass through unchanged. if (!layout || !layout.isForWorkgroup()) diff --git a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp index 243581b4ce522..f0508a30621f2 100644 --- a/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp +++ b/mlir/lib/Dialect/XeGPU/Utils/XeGPUUtils.cpp @@ -879,7 +879,7 @@ template int xegpu::getLargestDivisor(unsigned dim, ArrayRef candidates, ArrayRef candidateMultiples); -bool xegpu::requirePacked(const xegpu::LayoutAttr layout) { +bool xegpu::requirePacked(const xegpu::DistributeLayoutAttr layout) { if (!layout) return false; auto laneData = layout.getEffectiveLaneDataAsInt(); @@ -888,7 +888,7 @@ bool xegpu::requirePacked(const xegpu::LayoutAttr layout) { return laneData[0] != 1; } -bool xegpu::requireTranspose(const xegpu::LayoutAttr layout, +bool xegpu::requireTranspose(const xegpu::DistributeLayoutAttr layout, const xegpu::uArch::uArch *uArch) { // Return false for unsupported targets. // TODO: Add more support or move to target info. diff --git a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir index 9ca424374335f..61b8046bd04e5 100644 --- a/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir +++ b/mlir/test/Dialect/XeGPU/xegpu-blocking.mlir @@ -257,7 +257,7 @@ gpu.module @test_kernel { // ----- #l = #xegpu.layout -#r = #xegpu.layout +#r = #xegpu.slice<#xegpu.layout, dims = [0]> gpu.module @test_kernel { gpu.func @reduce_dim_0(%a: memref<16x512xf32>, %b: memref<512xf32>) kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} { %acc = arith.constant {layout_result_0 = #r} dense<0.0> : vector<64xf32> @@ -277,7 +277,7 @@ gpu.module @test_kernel { // ----- #l = #xegpu.layout -#r = #xegpu.layout +#r = #xegpu.slice<#xegpu.layout, dims = [1]> gpu.module @test_kernel { gpu.func @reduce_dim_1(%a: memref<512x32xf32>, %b: memref<512xf32>) kernel attributes {VectorComputeFunctionINTEL, spirv.entry_point_abi = #spirv.entry_point_abi<>} { %c1 = arith.constant 1 : index diff --git a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp index 0d10ab7c74da6..4760016bdcea4 100644 --- a/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp +++ b/mlir/test/lib/Dialect/XeGPU/TestXeGPUTransforms.cpp @@ -106,10 +106,9 @@ struct TestXeGPUUnrollingPatterns } if (auto layout = tdescTy.getLayoutAttr()) { - auto inst_data = layout.getInstData(); - if (inst_data && layout.isForSubgroup()) - return SmallVector(inst_data.asArrayRef().begin(), - inst_data.asArrayRef().end()); + auto inst_data = layout.getEffectiveInstDataAsInt(); + if (!inst_data.empty() && layout.isForSubgroup()) + return SmallVector(inst_data.begin(), inst_data.end()); } } @@ -138,9 +137,9 @@ struct TestXeGPUUnrollingPatterns if (chunkSize > 1) { int64_t blockedChunkSize = chunkSize; - auto instData = layout.getInstData(); + auto instData = layout.getEffectiveInstDataAsInt(); if (!instData.empty()) - blockedChunkSize = instData.asArrayRef().back(); + blockedChunkSize = instData.back(); // To create a new attribute with a different chunk_size: auto newEncoding = xegpu::ScatterTensorDescAttr::get( @@ -150,7 +149,7 @@ struct TestXeGPUUnrollingPatterns } } if (layout) { - if (layout.getLaneLayout() == nullptr) + if (layout.getEffectiveLaneLayoutAsInt().empty()) layout = xegpu::LayoutAttr(); else layout = layout.dropInstData();