Skip to content

Commit

Permalink
[mlir][Linalg] Add a useLinalgCopy option to Linalg bufferization.
Browse files Browse the repository at this point in the history
Benchmarks show that memref::CopyOp is curently up to 200x slower than
tiled and vectorized versions of linalg::Copy.
Add a temporary flag to allow comprehensive bufferize to generate a
linalg::GenericOp that implements a copy until this performance bug is
resolved.

Differential Revision: https://reviews.llvm.org/D117696
  • Loading branch information
Nicolas Vasilache committed Jan 19, 2022
1 parent baa9b7c commit d492a7b
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 2 deletions.
2 changes: 2 additions & 0 deletions mlir/include/mlir/Dialect/Linalg/Passes.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,8 @@ std::unique_ptr<OperationPass<FuncOp>> createConvertLinalgToAffineLoopsPass();
/// on SSA use-def chains starting from function operands that are annotated
/// with the 'inplaceable' attribute.
std::unique_ptr<Pass> createLinalgComprehensiveModuleBufferizePass();
std::unique_ptr<Pass>
createLinalgComprehensiveModuleBufferizePass(bool useLinalgCopy);

/// Create a pass to convert Linalg operations which work on tensors to use
/// buffers instead.
Expand Down
3 changes: 3 additions & 0 deletions mlir/include/mlir/Dialect/Linalg/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ def LinalgComprehensiveModuleBufferize :
Option<"useAlloca", "use-alloca", "bool",
/*default=*/"false",
"Use stack allocations for memrefs (for testing purposes only)">,
Option<"useLinalgCopy", "use-linalg-copy", "bool",
/*default=*/"false",
"Use a copy operation implemented as a Linalg op.">,
Option<"analysisFuzzerSeed", "analysis-fuzzer-seed", "unsigned",
/*default=*/"0",
"Analyze ops in random order with a given seed (fuzzer)">,
Expand Down
43 changes: 41 additions & 2 deletions mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@ struct LinalgComprehensiveModuleBufferize
LinalgComprehensiveModuleBufferize(
const LinalgComprehensiveModuleBufferize &p) = default;

LinalgComprehensiveModuleBufferize(bool linalgCopy) {
this->useLinalgCopy = linalgCopy;
}

void runOnOperation() override;

void getDependentDialects(DialectRegistry &registry) const override {
Expand Down Expand Up @@ -74,6 +78,32 @@ static FailureOr<Value> allocationFnUsingAlloca(OpBuilder &b, Location loc,
return allocated;
}

/// Create a linalg::GenericOp version of an n-D copy that can further tile,
/// lower to loops or vectorize, unlike the current implementation of
/// memref::CopyOp.
/// Do not depend on linalg::CopyOp that is getting deprecated.
static LogicalResult createLinalgCopyOp(OpBuilder &b, Location loc, Value from,
Value to) {
auto memrefTypeFrom = from.getType().cast<MemRefType>();
auto memrefTypeTo = to.getType().cast<MemRefType>();
if (!memrefTypeFrom || !memrefTypeTo ||
memrefTypeFrom.getRank() != memrefTypeTo.getRank())
return failure();
AffineMap id =
AffineMap::getMultiDimIdentityMap(memrefTypeTo.getRank(), b.getContext());
SmallVector<StringRef> iteratorTypes(memrefTypeTo.getRank(),
getParallelIteratorTypeName());
b.create<linalg::GenericOp>(loc,
/*inputs=*/from,
/*outputs=*/to,
/*indexingMaps=*/llvm::makeArrayRef({id, id}),
/*iteratorTypes=*/iteratorTypes,
[](OpBuilder &b, Location loc, ValueRange args) {
b.create<linalg::YieldOp>(loc, args.front());
});
return success();
}

void LinalgComprehensiveModuleBufferize::runOnOperation() {
auto options = std::make_unique<AnalysisBufferizationOptions>();
if (useAlloca) {
Expand All @@ -82,13 +112,17 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() {
return success();
};
}
// TODO: atm memref::CopyOp can be 200x slower than linalg::GenericOp.
// Once this perf bug is fixed more systematically, we can revisit.
if (useLinalgCopy)
options->memCpyFn = createLinalgCopyOp;

options->allowReturnMemref = allowReturnMemref;
options->allowUnknownOps = allowUnknownOps;
options->analysisFuzzerSeed = analysisFuzzerSeed;
options->testAnalysisOnly = testAnalysisOnly;
options->printConflicts = printConflicts;
options->createDeallocs = createDeallocs;
options->printConflicts = printConflicts;
options->testAnalysisOnly = testAnalysisOnly;

// Enable InitTensorOp elimination.
if (initTensorElimination) {
Expand Down Expand Up @@ -120,3 +154,8 @@ void LinalgComprehensiveModuleBufferize::runOnOperation() {
std::unique_ptr<Pass> mlir::createLinalgComprehensiveModuleBufferizePass() {
return std::make_unique<LinalgComprehensiveModuleBufferize>();
}

std::unique_ptr<Pass>
mlir::createLinalgComprehensiveModuleBufferizePass(bool useLinalgCopy) {
return std::make_unique<LinalgComprehensiveModuleBufferize>(useLinalgCopy);
}

0 comments on commit d492a7b

Please sign in to comment.