Skip to content

Commit

Permalink
[mlir][linalg][bufferize] Bufferize ops via PreOrder traversal
Browse files Browse the repository at this point in the history
The existing PostOrder traversal with special rules for certain ops was complicated and had a bug. Switch to PreOrder traversal.

Differential Revision: https://reviews.llvm.org/D113338
  • Loading branch information
matthias-springer committed Nov 10, 2021
1 parent 7b3305f commit c3eb967
Show file tree
Hide file tree
Showing 3 changed files with 116 additions and 80 deletions.
Expand Up @@ -1735,36 +1735,16 @@ static LogicalResult bufferizeFuncOpInternals(
LLVM_DEBUG(llvm::dbgs() << "\n\n");
LDBG("Begin BufferizeFuncOpInternals:\n" << funcOp << '\n');
OpBuilder b(funcOp->getContext());
/// Start by bufferizing `funcOp` arguments.

// Start by bufferizing `funcOp` arguments.
if (failed(bufferize(b, funcOp, bvm, aliasInfo, allocationFns)))
return failure();

// Cannot erase ops during the traversal. Do that afterwards.
SmallVector<Operation *> toErase;
// Bufferize the function body. `bufferizedOps` keeps track ops that were
// already bufferized with pre-order traversal.
DenseSet<Operation *> bufferizedOps;
auto walkFunc = [&](Operation *op) -> WalkResult {
// Collect ops that need to be bufferized before `op`.
SmallVector<Operation *> preorderBufferize;
Operation *parentOp = op->getParentOp();
// scf::ForOp and TiledLoopOp must be bufferized before their blocks
// ("pre-order") because BBargs must be mapped when bufferizing children.
while (isa_and_nonnull<scf::ForOp, TiledLoopOp>(parentOp)) {
if (bufferizedOps.contains(parentOp))
break;
bufferizedOps.insert(parentOp);
preorderBufferize.push_back(parentOp);
parentOp = parentOp->getParentOp();
}

for (Operation *op : llvm::reverse(preorderBufferize))
if (failed(bufferizeOp(op, bvm, aliasInfo, allocationFns,
&bufferizedFunctionTypes)))
return failure();

if (!bufferizedOps.contains(op) &&
failed(bufferizeOp(op, bvm, aliasInfo, allocationFns,
auto walkFunc = [&](Operation *op) -> WalkResult {
if (failed(bufferizeOp(op, bvm, aliasInfo, allocationFns,
&bufferizedFunctionTypes)))
return failure();

Expand All @@ -1776,7 +1756,11 @@ static LogicalResult bufferizeFuncOpInternals(

return success();
};
if (funcOp.walk(walkFunc).wasInterrupted())

// Bufferize ops pre-order, i.e., bufferize ops first, then their children.
// This is needed for ops with blocks that have BlockArguments. These must be
// mapped before bufferizing the children.
if (funcOp.walk<WalkOrder::PreOrder>(walkFunc).wasInterrupted())
return failure();

LDBG("End BufferizeFuncOpInternals:\n" << funcOp << '\n');
Expand Down Expand Up @@ -2798,31 +2782,40 @@ struct IfOpInterface
BlockAndValueMapping &bvm,
BufferizationAliasInfo &aliasInfo,
AllocationCallbacks &allocationFn) const {
auto ifOp = cast<scf::IfOp>(op);

// Take a guard before anything else.
OpBuilder::InsertionGuard g(b);
// scf::IfOp is bufferized after scf::YieldOp in the else branch.
return success();
}
};

for (OpResult opResult : ifOp->getResults()) {
if (!opResult.getType().isa<TensorType>())
continue;
// TODO: Atm we bail on unranked TensorType because we don't know how to
// alloc an UnrankedMemRefType + its underlying ranked MemRefType.
assert(opResult.getType().isa<RankedTensorType>() &&
"unsupported unranked tensor");
/// Bufferize the scf::IfOp. This function is called after the YieldOp was
/// bufferized.
static LogicalResult bufferizeIfOp(scf::IfOp ifOp, OpBuilder &b,
BlockAndValueMapping &bvm,
BufferizationAliasInfo &aliasInfo,
AllocationCallbacks &allocationFn) {
// Take a guard before anything else.
OpBuilder::InsertionGuard g(b);
b.setInsertionPoint(ifOp);

Value resultBuffer =
getResultBuffer(b, opResult, bvm, aliasInfo, allocationFn);
if (!resultBuffer)
return failure();
for (OpResult opResult : ifOp->getResults()) {
if (!opResult.getType().isa<TensorType>())
continue;
// TODO: Atm we bail on unranked TensorType because we don't know how to
// alloc an UnrankedMemRefType + its underlying ranked MemRefType.
assert(opResult.getType().isa<RankedTensorType>() &&
"unsupported unranked tensor");

aliasInfo.createAliasInfoEntry(resultBuffer);
map(bvm, opResult, resultBuffer);
}
Value resultBuffer =
getResultBuffer(b, opResult, bvm, aliasInfo, allocationFn);
if (!resultBuffer)
return failure();

return success();
aliasInfo.createAliasInfoEntry(resultBuffer);
map(bvm, opResult, resultBuffer);
}
};

return success();
}

struct ForOpInterface
: public BufferizableOpInterface::ExternalModel<ForOpInterface,
Expand Down Expand Up @@ -2862,6 +2855,9 @@ struct ForOpInterface
BlockAndValueMapping &bvm,
BufferizationAliasInfo &aliasInfo,
AllocationCallbacks &allocationFn) const {
// Note: This method is just setting up the mappings for the block arguments
// and the result buffer. The op is bufferized after the scf::YieldOp.

auto forOp = cast<scf::ForOp>(op);

// Take a guard before anything else.
Expand Down Expand Up @@ -2893,6 +2889,39 @@ struct ForOpInterface
}
};

/// Bufferize the scf::ForOp. This function is called after the YieldOp was
/// bufferized.
static LogicalResult bufferizeForOp(scf::ForOp forOp, OpBuilder &b,
BlockAndValueMapping &bvm,
BufferizationAliasInfo &aliasInfo,
AllocationCallbacks &allocationFn) {
auto yieldOp = cast<scf::YieldOp>(&forOp.region().front().back());
for (OpOperand &operand : yieldOp->getOpOperands()) {
auto tensorType = operand.get().getType().dyn_cast<TensorType>();
if (!tensorType)
continue;

OpOperand &forOperand = forOp.getOpOperandForResult(
forOp->getResult(operand.getOperandNumber()));
auto bbArg = forOp.getRegionIterArgForOpOperand(forOperand);
Value yieldedBuffer = lookup(bvm, operand.get());
Value bbArgBuffer = lookup(bvm, bbArg);
if (!aliasInfo.areEquivalentBufferizedValues(yieldedBuffer, bbArgBuffer)) {
// TODO: this could get resolved with copies but it can also turn into
// swaps so we need to be careful about order of copies.
return yieldOp->emitError()
<< "Yield operand #" << operand.getOperandNumber()
<< " does not bufferize to an equivalent buffer to the matching"
<< " enclosing scf::for operand";
}

// Buffers are equivalent so the work is already done and we just yield
// the bbArg so that it later canonicalizes away.
operand.set(bbArg);
}
return success();
}

struct YieldOpInterface
: public BufferizableOpInterface::ExternalModel<YieldOpInterface,
scf::YieldOp> {
Expand All @@ -2918,49 +2947,26 @@ struct YieldOpInterface
AllocationCallbacks &allocationFn) const {
auto yieldOp = cast<scf::YieldOp>(op);

// Take a guard before anything else.
OpBuilder::InsertionGuard g(b);
// Cannot create IR past a yieldOp.
b.setInsertionPoint(yieldOp);

if (auto execOp = dyn_cast<scf::ExecuteRegionOp>(yieldOp->getParentOp())) {
if (execOp->getNumResults() != 0)
return execOp->emitError(
"expected result-less scf.execute_region containing op");
return success();
}

if (auto ifOp = dyn_cast<scf::IfOp>(yieldOp->getParentOp()))
return success();

scf::ForOp forOp = dyn_cast<scf::ForOp>(yieldOp->getParentOp());
if (!forOp)
return yieldOp->emitError("expected scf::ForOp parent for scf::YieldOp");
for (OpOperand &operand : yieldOp->getOpOperands()) {
auto tensorType = operand.get().getType().dyn_cast<TensorType>();
if (!tensorType)
continue;
// Bufferize scf::IfOp after bufferizing the scf::YieldOp in the else
// branch.
if (auto ifOp = dyn_cast<scf::IfOp>(yieldOp->getParentOp())) {
if (ifOp.elseYield() != yieldOp)
return success();
return bufferizeIfOp(ifOp, b, bvm, aliasInfo, allocationFn);
}

OpOperand &forOperand = forOp.getOpOperandForResult(
forOp->getResult(operand.getOperandNumber()));
auto bbArg = forOp.getRegionIterArgForOpOperand(forOperand);
Value yieldedBuffer = lookup(bvm, operand.get());
Value bbArgBuffer = lookup(bvm, bbArg);
if (!aliasInfo.areEquivalentBufferizedValues(yieldedBuffer,
bbArgBuffer)) {
// TODO: this could get resolved with copies but it can also turn into
// swaps so we need to be careful about order of copies.
return yieldOp->emitError()
<< "Yield operand #" << operand.getOperandNumber()
<< " does not bufferize to an equivalent buffer to the matching"
<< " enclosing scf::for operand";
}
// Bufferize scf::ForOp after bufferizing the scf::YieldOp.
if (auto forOp = dyn_cast<scf::ForOp>(yieldOp->getParentOp()))
return bufferizeForOp(forOp, b, bvm, aliasInfo, allocationFn);

// Buffers are equivalent so the work is already done and we just yield
// the bbArg so that it later canonicalizes away.
operand.set(bbArg);
}
return success();
return yieldOp->emitError("expected scf::ForOp parent for scf::YieldOp");
}
};

Expand Down
Expand Up @@ -160,7 +160,7 @@ func @mini_test_case1() -> tensor<10x20xf32> {
// -----

func @main() -> tensor<4xi32> {
// expected-error @+1 {{expected result-less scf.execute_region containing op}}
// expected-error @+1 {{unsupported op with tensors}}
%r = scf.execute_region -> tensor<4xi32> {
%A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32>
scf.yield %A: tensor<4xi32>
Expand Down
30 changes: 30 additions & 0 deletions mlir/test/Dialect/Linalg/comprehensive-module-bufferize.mlir
Expand Up @@ -887,3 +887,33 @@ func @scf_if_inplace(%cond: i1,
}
return %r : tensor<?xf32>
}

// -----

// CHECK-LABEL: func @scf_if_inside_scf_for
// CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index
// CHECK-DAG: %[[c1:.*]] = arith.constant 1 : index
// CHECK-DAG: %[[c10:.*]] = arith.constant 10 : index
// CHECK: scf.for %{{.*}} = %[[c0]] to %[[c10]] step %[[c1]] {
// CHECK: scf.if %{{.*}} {
// CHECK: } else {
// CHECK: vector.transfer_write
// CHECK: }
// CHECK: }
func @scf_if_inside_scf_for(%t1: tensor<?xf32> {linalg.inplaceable = true},
%v: vector<5xf32>, %idx: index,
%cond: i1) -> tensor<?xf32> {
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index
%r = scf.for %iv = %c0 to %c10 step %c1 iter_args(%bb = %t1) -> (tensor<?xf32>) {
%r2 = scf.if %cond -> (tensor<?xf32>) {
scf.yield %bb : tensor<?xf32>
} else {
%t2 = vector.transfer_write %v, %bb[%idx] : vector<5xf32>, tensor<?xf32>
scf.yield %t2 : tensor<?xf32>
}
scf.yield %r2 : tensor<?xf32>
}
return %r : tensor<?xf32>
}

0 comments on commit c3eb967

Please sign in to comment.