-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[MLIR][Affine] Check dependences during MDG init #156422
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[MLIR][Affine] Check dependences during MDG init #156422
Conversation
4ea235f
to
11058ac
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Posted some comments. Thanks.
11058ac
to
025a83f
Compare
@llvm/pr-subscribers-mlir @llvm/pr-subscribers-mlir-affine Author: Uday Bondhugula (bondhugula) ChangesCheck affine dependences precisely during MDG init before adding edges. Leads to more/better fusion as a result. Fixes: #156421 Full diff: https://github.com/llvm/llvm-project/pull/156422.diff 3 Files Affected:
diff --git a/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h b/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h
index 0dd8de4f70039..916cebbe2bf8e 100644
--- a/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h
+++ b/mlir/include/mlir/Dialect/Affine/Analysis/Utils.h
@@ -153,9 +153,17 @@ struct MemRefDependenceGraph {
MemRefDependenceGraph(Block &block) : block(block) {}
- // Initializes the dependence graph based on operations in `block'.
- // Returns true on success, false otherwise.
- bool init();
+ // Initializes the data dependence graph by iteration over operations in the
+ // MDG's `block`. A `Node` is created for every top-level op except for
+ // side-effect-free operations with zero results and no regions. Assigns each
+ // node in the graph a node id based on the order in block. Fails if certain
+ // kinds of operations, for which `Node` creation isn't supported, are
+ // encountered (unknown region holding ops). If `fullAffineDependences` is
+ // set, affine memory dependence analysis is performed before concluding that
+ // conflicting affine memory accesses lead to a dependence check; otherwise, a
+ // pair of conflicting affine memory accesses (where one of them is a store
+ // and they are to the same memref) always leads to an edge (conservatively).
+ bool init(bool fullAffineDependences = true);
// Returns the graph node for 'id'.
const Node *getNode(unsigned id) const;
diff --git a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
index 99ea20bf13b49..ea693034c2063 100644
--- a/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Analysis/Utils.cpp
@@ -18,6 +18,7 @@
#include "mlir/Dialect/Affine/Analysis/LoopAnalysis.h"
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/Utils/StaticValueUtils.h"
#include "mlir/IR/IntegerSet.h"
#include "llvm/ADT/SetVector.h"
@@ -241,7 +242,96 @@ addNodeToMDG(Operation *nodeOp, MemRefDependenceGraph &mdg,
return &node;
}
-bool MemRefDependenceGraph::init() {
+/// Returns the memref being read/written by a memref/affine load/store op.
+static Value getMemRef(Operation *memOp) {
+ if (auto memrefLoad = dyn_cast<memref::LoadOp>(memOp))
+ return memrefLoad.getMemRef();
+ if (auto affineLoad = dyn_cast<AffineReadOpInterface>(memOp))
+ return affineLoad.getMemRef();
+ if (auto memrefStore = dyn_cast<memref::StoreOp>(memOp))
+ return memrefStore.getMemRef();
+ if (auto affineStore = dyn_cast<AffineWriteOpInterface>(memOp))
+ return affineStore.getMemRef();
+ llvm_unreachable("unexpected op");
+}
+
+/// Returns true if there may be a dependence on `memref` from srcNode's
+/// memory ops to dstNode's memory ops, while using the affine memory
+/// dependence analysis checks. The method assumes that there is at least one
+/// memory op in srcNode's loads and stores on `memref`, and similarly for
+/// `dstNode`. `srcNode.op` and `destNode.op` are expected to be nested in the
+/// same block and so the dependences are tested at the depth of that block.
+static bool mayDependence(const Node &srcNode, const Node &dstNode,
+ Value memref) {
+ assert(srcNode.op->getBlock() == dstNode.op->getBlock());
+ if (!isa<AffineForOp>(srcNode.op) || !isa<AffineForOp>(dstNode.op))
+ return true;
+
+ // Conservatively handle dependences involving non-affine load/stores. Return
+ // true if there exists a conflicting read/write access involving such.
+ auto hasNonAffineDep = [&](ArrayRef<Operation *> srcOps,
+ ArrayRef<Operation *> dstOps) {
+ return llvm::any_of(srcOps, [&](Operation *srcOp) {
+ Value srcMemref = getMemRef(srcOp);
+ if (srcMemref != memref)
+ return false;
+ return llvm::find_if(dstOps, [&](Operation *dstOp) {
+ return srcMemref == getMemRef(dstOp);
+ }) != dstOps.end();
+ });
+ };
+
+ SmallVector<Operation *> dstOps;
+ // Between non-affine src stores and dst load/store.
+ llvm::append_range(dstOps, llvm::concat<Operation *const>(
+ dstNode.loads, dstNode.stores,
+ dstNode.memrefLoads, dstNode.memrefStores));
+ if (hasNonAffineDep(srcNode.memrefStores, dstOps))
+ return true;
+ // Between non-affine loads and dst stores.
+ dstOps.clear();
+ llvm::append_range(dstOps, llvm::concat<Operation *const>(
+ dstNode.stores, dstNode.memrefStores));
+ if (hasNonAffineDep(srcNode.memrefLoads, dstOps))
+ return true;
+ // Between affine stores and memref load/stores.
+ dstOps.clear();
+ llvm::append_range(dstOps, llvm::concat<Operation *const>(
+ dstNode.memrefLoads, dstNode.memrefStores));
+ if (hasNonAffineDep(srcNode.stores, dstOps))
+ return true;
+ // Between affine loads and memref stores.
+ dstOps.clear();
+ llvm::append_range(dstOps, llvm::concat<Operation *const>(
+ dstNode.stores, dstNode.memrefStores));
+ if (hasNonAffineDep(srcNode.loads, dstOps))
+ return true;
+
+ // Affine load/store pairs. We don't need to check for locally allocated
+ // memrefs since the dependence analysis here is between mem ops from
+ // srcNode's for op to dstNode's for op at the depth at which those
+ // `affine.for` ops are nested, i.e., dependences at depth `d + 1` where
+ // `d` is the number of common surrounding loops.
+ for (auto *srcMemOp :
+ llvm::concat<Operation *const>(srcNode.stores, srcNode.loads)) {
+ MemRefAccess srcAcc(srcMemOp);
+ if (srcAcc.memref != memref)
+ continue;
+ for (auto *destMemOp :
+ llvm::concat<Operation *const>(dstNode.stores, dstNode.loads)) {
+ MemRefAccess destAcc(destMemOp);
+ if (destAcc.memref != memref)
+ continue;
+ // Check for a top-level dependence between srcNode and destNode's ops.
+ if (!noDependence(checkMemrefAccessDependence(
+ srcAcc, destAcc, getNestingDepth(srcNode.op) + 1)))
+ return true;
+ }
+ }
+ return false;
+}
+
+bool MemRefDependenceGraph::init(bool fullAffineDependences) {
LDBG() << "--- Initializing MDG ---";
// Map from a memref to the set of ids of the nodes that have ops accessing
// the memref.
@@ -344,8 +434,12 @@ bool MemRefDependenceGraph::init() {
Node *dstNode = getNode(dstId);
bool dstHasStoreOrFree =
dstNode->hasStore(srcMemRef) || dstNode->hasFree(srcMemRef);
- if (srcHasStoreOrFree || dstHasStoreOrFree)
- addEdge(srcId, dstId, srcMemRef);
+ if ((srcHasStoreOrFree || dstHasStoreOrFree)) {
+ // Check precise affine deps if asked for; otherwise, conservative.
+ if (!fullAffineDependences ||
+ mayDependence(*srcNode, *dstNode, srcMemRef))
+ addEdge(srcId, dstId, srcMemRef);
+ }
}
}
}
@@ -562,13 +656,13 @@ MemRefDependenceGraph::getFusedLoopNestInsertionPoint(unsigned srcId,
}
// Build set of insts in range (srcId, dstId) which depend on 'srcId'.
- SmallPtrSet<Operation *, 2> srcDepInsts;
+ llvm::SmallPtrSet<Operation *, 2> srcDepInsts;
for (auto &outEdge : outEdges.lookup(srcId))
if (outEdge.id != dstId)
srcDepInsts.insert(getNode(outEdge.id)->op);
// Build set of insts in range (srcId, dstId) on which 'dstId' depends.
- SmallPtrSet<Operation *, 2> dstDepInsts;
+ llvm::SmallPtrSet<Operation *, 2> dstDepInsts;
for (auto &inEdge : inEdges.lookup(dstId))
if (inEdge.id != srcId)
dstDepInsts.insert(getNode(inEdge.id)->op);
diff --git a/mlir/test/Dialect/Affine/loop-fusion-sibling.mlir b/mlir/test/Dialect/Affine/loop-fusion-sibling.mlir
new file mode 100644
index 0000000000000..937c855b86b50
--- /dev/null
+++ b/mlir/test/Dialect/Affine/loop-fusion-sibling.mlir
@@ -0,0 +1,23 @@
+// RUN: mlir-opt %s -pass-pipeline='builtin.module(func.func(affine-loop-fusion{maximal mode=sibling}))' | FileCheck %s
+
+// Test cases specifically for sibling fusion. Note that sibling fusion test
+// cases also exist in loop-fusion*.mlir.
+
+// CHECK-LABEL: func @disjoint_stores
+func.func @disjoint_stores(%0: memref<8xf32>) {
+ %alloc_1 = memref.alloc() : memref<16xf32>
+ // The affine stores below are to different parts of the memrefs. Sibling
+ // fusion helps improve reuse and is valid.
+ affine.for %arg2 = 0 to 8 {
+ %2 = affine.load %0[%arg2] : memref<8xf32>
+ affine.store %2, %alloc_1[%arg2] : memref<16xf32>
+ }
+ affine.for %arg2 = 0 to 8 {
+ %2 = affine.load %0[%arg2] : memref<8xf32>
+ %3 = arith.negf %2 : f32
+ affine.store %3, %alloc_1[%arg2 + 8] : memref<16xf32>
+ }
+ // CHECK: affine.for
+ // CHECK-NOT: affine.for
+ return
+}
|
025a83f
to
abde432
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Some more comments:)
abde432
to
18b9390
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM. Thanks for this improvement.
Check affine dependences precisely during MDG init before adding edges. We were conservatively only checking for memref-level conflicts. Leads to more/better fusion as a result. Fixes: llvm#156421
18b9390
to
688c1cb
Compare
Check affine dependences precisely during MDG init before adding edges.
We were conservatively only checking for memref-level conflicts.
Leads to more/better fusion as a result.
Fixes: #156421