Skip to content

Commit

Permalink
[mlir][linalg] Add unit dim folding pattern for tensor.pad (#84684)
Browse files Browse the repository at this point in the history
Unit extent dims that are not padded by a tensor.pad can be folded away.
When folding unit extent dims of surrounding linalg ops, this increases
the chance that the iteration space of the linalg op will align with
nearby pad ops, improving fusion opportunities.
  • Loading branch information
qedawkins committed Mar 11, 2024
1 parent c93c76b commit 60e562d
Show file tree
Hide file tree
Showing 3 changed files with 213 additions and 0 deletions.
4 changes: 4 additions & 0 deletions mlir/include/mlir/Dialect/Linalg/Transforms/Transforms.h
Original file line number Diff line number Diff line change
Expand Up @@ -481,6 +481,10 @@ struct ControlDropUnitDims {
if (auto genericOp = dyn_cast_or_null<GenericOp>(op)) {
return llvm::to_vector(llvm::seq<unsigned>(0, genericOp.getNumLoops()));
}
if (auto padOp = dyn_cast_or_null<tensor::PadOp>(op)) {
return llvm::to_vector(
llvm::seq<unsigned>(0, padOp.getSourceType().getRank()));
}
return SmallVector<unsigned>{};
};
};
Expand Down
122 changes: 122 additions & 0 deletions mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,126 @@ struct DropUnitDims : public OpRewritePattern<GenericOp> {
};
} // namespace

//===---------------------------------------------------------------------===//
// Drop dimensions that are unit-extents within tensor operations.
//===---------------------------------------------------------------------===//

namespace {
struct DropPadUnitDims : public OpRewritePattern<tensor::PadOp> {
DropPadUnitDims(MLIRContext *context, ControlDropUnitDims options = {},
PatternBenefit benefit = 1)
: OpRewritePattern(context, benefit), options(std::move(options)) {}

LogicalResult matchAndRewrite(tensor::PadOp padOp,
PatternRewriter &rewriter) const override {
// 1a. Get the allowed list of dimensions to drop from the `options`.
SmallVector<unsigned> allowedUnitDims = options.controlFn(padOp);
if (allowedUnitDims.empty()) {
return rewriter.notifyMatchFailure(
padOp, "control function returns no allowed unit dims to prune");
}

if (padOp.getSourceType().getEncoding()) {
return rewriter.notifyMatchFailure(
padOp, "cannot collapse dims of tensor with encoding");
}

// Fail for non-constant padding values. The body of the pad could
// depend on the padding indices and/or properties of the padded
// tensor so for now we fail.
// TODO: Support non-constant padding values.
Value paddingVal = padOp.getConstantPaddingValue();
if (!paddingVal) {
return rewriter.notifyMatchFailure(
padOp, "unimplemented: non-constant padding value");
}

ArrayRef<int64_t> sourceShape = padOp.getSourceType().getShape();
int64_t padRank = sourceShape.size();

auto isStaticZero = [](OpFoldResult f) {
std::optional<int64_t> maybeInt = getConstantIntValue(f);
return maybeInt && *maybeInt == 0;
};

llvm::SmallDenseSet<unsigned> unitDimsFilter(allowedUnitDims.begin(),
allowedUnitDims.end());
llvm::SmallDenseSet<unsigned> unitDims;
SmallVector<int64_t> newShape;
SmallVector<OpFoldResult> newLowPad;
SmallVector<OpFoldResult> newHighPad;
for (const auto [dim, size, low, high] :
zip_equal(llvm::seq(static_cast<int64_t>(0), padRank), sourceShape,
padOp.getMixedLowPad(), padOp.getMixedHighPad())) {
if (unitDimsFilter.contains(dim) && size == 1 && isStaticZero(low) &&
isStaticZero(high)) {
unitDims.insert(dim);
} else {
newShape.push_back(size);
newLowPad.push_back(low);
newHighPad.push_back(high);
}
}

if (unitDims.empty()) {
return rewriter.notifyMatchFailure(padOp, "no unit dims to collapse");
}

ReassociationIndices reassociationGroup;
SmallVector<ReassociationIndices> reassociationMap;
int64_t dim = 0;
while (dim < padRank && unitDims.contains(dim))
reassociationGroup.push_back(dim++);
while (dim < padRank) {
assert(!unitDims.contains(dim) && "expected non unit-extent");
reassociationGroup.push_back(dim);
dim++;
// Fold all following dimensions that are unit-extent.
while (dim < padRank && unitDims.contains(dim))
reassociationGroup.push_back(dim++);
reassociationMap.push_back(reassociationGroup);
reassociationGroup.clear();
}

Value collapsedSource =
collapseValue(rewriter, padOp.getLoc(), padOp.getSource(), newShape,
reassociationMap, options.rankReductionStrategy);

auto newPadOp = rewriter.create<tensor::PadOp>(
padOp.getLoc(), /*result=*/Type(), collapsedSource, newLowPad,
newHighPad, paddingVal, padOp.getNofold());

Value dest = padOp.getResult();
if (options.rankReductionStrategy ==
ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice) {
SmallVector<OpFoldResult> expandedSizes;
int64_t numUnitDims = 0;
for (auto dim : llvm::seq(static_cast<int64_t>(0), padRank)) {
if (unitDims.contains(dim)) {
expandedSizes.push_back(rewriter.getIndexAttr(1));
numUnitDims++;
continue;
}
expandedSizes.push_back(tensor::getMixedSize(
rewriter, padOp.getLoc(), newPadOp, dim - numUnitDims));
}
dest = rewriter.create<tensor::EmptyOp>(
padOp.getLoc(), expandedSizes,
padOp.getResultType().getElementType());
}

Value expandedValue =
expandValue(rewriter, padOp.getLoc(), newPadOp.getResult(), dest,
reassociationMap, options.rankReductionStrategy);
rewriter.replaceOp(padOp, expandedValue);
return success();
}

private:
ControlDropUnitDims options;
};
} // namespace

namespace {
/// Convert `extract_slice` operations to rank-reduced versions.
struct RankReducedExtractSliceOp
Expand Down Expand Up @@ -640,6 +760,7 @@ populateFoldUnitExtentDimsViaReshapesPatterns(RewritePatternSet &patterns,
ControlDropUnitDims &options) {
auto *context = patterns.getContext();
patterns.add<DropUnitDims>(context, options);
patterns.add<DropPadUnitDims>(context, options);
// TODO: Patterns unrelated to unit dim folding should be factored out.
patterns.add<RankReducedExtractSliceOp,
RankReducedInsertSliceOp<tensor::InsertSliceOp>,
Expand All @@ -661,6 +782,7 @@ populateFoldUnitExtentDimsViaSlicesPatterns(RewritePatternSet &patterns,
options.rankReductionStrategy =
ControlDropUnitDims::RankReductionStrategy::ExtractInsertSlice;
patterns.add<DropUnitDims>(context, options);
patterns.add<DropPadUnitDims>(context, options);
// TODO: Patterns unrelated to unit dim folding should be factored out.
linalg::FillOp::getCanonicalizationPatterns(patterns, context);
tensor::EmptyOp::getCanonicalizationPatterns(patterns, context);
Expand Down
87 changes: 87 additions & 0 deletions mlir/test/Dialect/Linalg/drop-unit-extent-dims.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -946,3 +946,90 @@ func.func @drop_all_loops(%arg0 : memref<1x1xf32, 3>) -> memref<1x1xf32, 3>
// CHECK-SLICES-LABEL: func @drop_all_loops
// CHECK-SLICES: memref.subview %{{.*}}[0, 0] [1, 1] [1, 1] : memref<1x1xf32, 3> to memref<f32, strided<[]>, 3>
// CHECK-SLICES: linalg.generic{{.*}}memref<f32, strided<[]>, 3>

// -----

func.func @drop_unit_pad_dims(%arg0: tensor<1x1x3x1x1xf32>) -> tensor<1x2x3x1x3xf32>
{
%c0 = arith.constant 0 : index
%cst0 = arith.constant 0.0 : f32
%0 = tensor.pad %arg0 low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index):
tensor.yield %cst0 : f32
} : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
return %0 : tensor<1x2x3x1x3xf32>
}

// CHECK-LABEL: func @drop_unit_pad_dims
// CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape
// CHECK-SAME: {{\[}}[0, 1], [2, 3], [4]{{\]}} : tensor<1x1x3x1x1xf32> into tensor<1x3x1xf32>
// CHECK: %[[PADDED:.+]] = tensor.pad %[[COLLAPSE]] low[1, 0, 0] high[0, 0, 2]
// CHECK: } : tensor<1x3x1xf32> to tensor<2x3x3xf32>
// CHECK: tensor.expand_shape %[[PADDED]]
// CHECK-SAME: {{\[}}[0, 1], [2, 3], [4]{{\]}} : tensor<2x3x3xf32> into tensor<1x2x3x1x3xf32>

// CHECK-SLICES-LABEL: func @drop_unit_pad_dims
// CHECK-SLICES: %[[EXTRACT:.+]] = tensor.extract_slice
// CHECK-SLICES-SAME: [0, 0, 0, 0, 0] [1, 1, 3, 1, 1] [1, 1, 1, 1, 1] : tensor<1x1x3x1x1xf32> to tensor<1x3x1xf32>
// CHECK-SLICES: %[[PADDED:.+]] = tensor.pad %[[EXTRACT]] low[1, 0, 0] high[0, 0, 2]
// CHECK-SLICES: } : tensor<1x3x1xf32> to tensor<2x3x3xf32>
// CHECK-SLICES: tensor.insert_slice %[[PADDED]]
// CHECK-SLICES-SAME: [0, 0, 0, 0, 0] [1, 2, 3, 1, 3] [1, 1, 1, 1, 1] : tensor<2x3x3xf32> into tensor<1x2x3x1x3xf32>

// -----

func.func @drop_unit_pad_dynamic_dims(%arg0: tensor<1x?xf32>) -> tensor<1x?xf32>
{
%c0 = arith.constant 0 : index
%cst0 = arith.constant 0.0 : f32
%0 = tensor.pad %arg0 low[0, 5] high[0, 6] {
^bb0(%arg1: index, %arg2: index):
tensor.yield %cst0 : f32
} : tensor<1x?xf32> to tensor<1x?xf32>
return %0 : tensor<1x?xf32>
}

// CHECK-LABEL: func @drop_unit_pad_dynamic_dims
// CHECK: %[[COLLAPSE:.+]] = tensor.collapse_shape
// CHECK-SAME: {{\[}}[0, 1]{{\]}} : tensor<1x?xf32> into tensor<?xf32>
// CHECK: %[[PADDED:.+]] = tensor.pad %[[COLLAPSE]] low[5] high[6]
// CHECK: } : tensor<?xf32> to tensor<?xf32>
// CHECK: tensor.expand_shape %[[PADDED]]
// CHECK-SAME: {{\[}}[0, 1]{{\]}} : tensor<?xf32> into tensor<1x?xf32>

// CHECK-SLICES: #[[$MAP:.+]] = affine_map<()[s0] -> (s0 + 11)>

// CHECK-SLICES-LABEL: func @drop_unit_pad_dynamic_dims
// CHECK-SLICES-SAME: %[[ARG0:[A-Za-z0-9]+]]: tensor<1x?xf32>
// CHECK-SLICES: %[[DIM:.+]] = tensor.dim %[[ARG0]], %c1
// CHECK-SLICES: %[[EXTRACT:.+]] = tensor.extract_slice
// CHECK-SLICES-SAME: [0, 0] [1, %[[DIM]]] [1, 1] : tensor<1x?xf32> to tensor<?xf32>
// CHECK-SLICES: %[[PADDED:.+]] = tensor.pad %[[EXTRACT]] low[5] high[6]
// CHECK-SLICES: } : tensor<?xf32> to tensor<?xf32>
// CHECK-SLICES: %[[PADDED_DIM:.+]] = affine.apply #[[$MAP]]()[%[[DIM]]]
// CHECK-SLICES: %[[EMPTY:.+]] = tensor.empty(%[[PADDED_DIM]]) : tensor<1x?xf32>
// CHECK-SLICES: tensor.insert_slice %[[PADDED]] into %[[EMPTY]]
// CHECK-SLICES-SAME: [0, 0] [1, %[[PADDED_DIM]]] [1, 1] : tensor<?xf32> into tensor<1x?xf32>

// -----

func.func @do_not_drop_non_constant_padding(%arg0: tensor<1x1x3x1x1xf32>, %pad: f32) -> tensor<1x2x3x1x3xf32>
{
%c0 = arith.constant 0 : index
%0 = tensor.pad %arg0 low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2] {
^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index, %arg5: index):
%0 = arith.index_cast %arg3 : index to i64
%1 = arith.sitofp %0 : i64 to f32
%add = arith.addf %pad, %1 : f32
tensor.yield %add : f32
} : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>
return %0 : tensor<1x2x3x1x3xf32>
}

// CHECK-LABEL: func @do_not_drop_non_constant_padding
// CHECK: tensor.pad %{{.*}} low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2]
// CHECK: } : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>

// CHECK-SLICES-LABEL: func @do_not_drop_non_constant_padding
// CHECK-SLICES: tensor.pad %{{.*}} low[0, 1, 0, %c0, 0] high[0, 0, 0, %c0, 2]
// CHECK-SLICES: } : tensor<1x1x3x1x1xf32> to tensor<1x2x3x1x3xf32>

0 comments on commit 60e562d

Please sign in to comment.