13
13
14
14
#include " mlir/Dialect/GPU/MemoryPromotion.h"
15
15
#include " mlir/Dialect/GPU/GPUDialect.h"
16
- #include " mlir/Dialect/MemRef/EDSC/Intrinsics.h"
17
16
#include " mlir/Dialect/SCF/SCF.h"
18
17
#include " mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
18
+ #include " mlir/IR/ImplicitLocOpBuilder.h"
19
19
#include " mlir/Pass/Pass.h"
20
20
#include " mlir/Transforms/LoopUtils.h"
21
21
@@ -41,45 +41,46 @@ static StringRef getDimName(unsigned dim) {
41
41
// / GPUDialect::getNumWorkgroupDimensions() loops, completing the nest with
42
42
// / single-iteration loops. Maps the innermost loops to thread dimensions, in
43
43
// / reverse order to enable access coalescing in the innermost loop.
44
- static void insertCopyLoops (OpBuilder &b, Location loc,
45
- MemRefBoundsCapture &bounds, Value from, Value to) {
46
- // Create EDSC handles for bounds.
47
- unsigned rank = bounds. rank ();
44
+ static void insertCopyLoops (ImplicitLocOpBuilder &b, Value from, Value to) {
45
+ auto memRefType = from. getType (). cast <MemRefType>();
46
+ auto rank = memRefType. getRank ();
47
+
48
48
SmallVector<Value, 4 > lbs, ubs, steps;
49
+ Value zero = b.create <ConstantIndexOp>(0 );
50
+ Value one = b.create <ConstantIndexOp>(1 );
49
51
50
52
// Make sure we have enough loops to use all thread dimensions, these trivial
51
53
// loops should be outermost and therefore inserted first.
52
54
if (rank < GPUDialect::getNumWorkgroupDimensions ()) {
53
55
unsigned extraLoops = GPUDialect::getNumWorkgroupDimensions () - rank;
54
- Value zero = std_constant_index (0 );
55
- Value one = std_constant_index (1 );
56
56
lbs.resize (extraLoops, zero);
57
57
ubs.resize (extraLoops, one);
58
58
steps.resize (extraLoops, one);
59
59
}
60
60
61
61
// Add existing bounds.
62
- lbs.append (bounds.getLbs ().begin (), bounds.getLbs ().end ());
63
- ubs.append (bounds.getUbs ().begin (), bounds.getUbs ().end ());
64
-
65
- // Emit constant operations for steps.
62
+ lbs.append (rank, zero);
63
+ ubs.reserve (lbs.size ());
66
64
steps.reserve (lbs.size ());
67
- llvm::transform (bounds.getSteps (), std::back_inserter (steps),
68
- [](int64_t step) { return std_constant_index (step); });
65
+ for (auto idx = 0 ; idx < rank; ++idx) {
66
+ ubs.push_back (
67
+ b.createOrFold <memref::DimOp>(from, b.create <ConstantIndexOp>(idx)));
68
+ steps.push_back (one);
69
+ }
69
70
70
71
// Obtain thread identifiers and block sizes, necessary to map to them.
71
72
auto indexType = b.getIndexType ();
72
73
SmallVector<Value, 3 > threadIds, blockDims;
73
74
for (unsigned i = 0 ; i < 3 ; ++i) {
74
75
auto dimName = b.getStringAttr (getDimName (i));
75
- threadIds.push_back (b.create <gpu::ThreadIdOp>(loc, indexType, dimName));
76
- blockDims.push_back (b.create <gpu::BlockDimOp>(loc, indexType, dimName));
76
+ threadIds.push_back (b.create <gpu::ThreadIdOp>(indexType, dimName));
77
+ blockDims.push_back (b.create <gpu::BlockDimOp>(indexType, dimName));
77
78
}
78
79
79
80
// Produce the loop nest with copies.
80
81
SmallVector<Value, 8 > ivs (lbs.size ());
81
82
mlir::scf::buildLoopNest (
82
- b, loc , lbs, ubs, steps,
83
+ b, b. getLoc () , lbs, ubs, steps,
83
84
[&](OpBuilder &b, Location loc, ValueRange loopIvs) {
84
85
ivs.assign (loopIvs.begin (), loopIvs.end ());
85
86
auto activeIvs = llvm::makeArrayRef (ivs).take_back (rank);
@@ -142,17 +143,13 @@ static void insertCopies(Region ®ion, Location loc, Value from, Value to) {
142
143
assert (llvm::hasSingleElement (region) &&
143
144
" unstructured control flow not supported" );
144
145
145
- OpBuilder b (region.getContext ());
146
- b.setInsertionPointToStart (®ion.front ());
147
-
148
- ScopedContext edscContext (b, loc);
149
- MemRefBoundsCapture fromBoundsCapture (from);
150
- insertCopyLoops (b, loc, fromBoundsCapture, from, to);
151
- b.create <gpu::BarrierOp>(loc);
146
+ auto b = ImplicitLocOpBuilder::atBlockBegin (loc, ®ion.front ());
147
+ insertCopyLoops (b, from, to);
148
+ b.create <gpu::BarrierOp>();
152
149
153
150
b.setInsertionPoint (®ion.front ().back ());
154
- b.create <gpu::BarrierOp>(loc );
155
- insertCopyLoops (b, loc, fromBoundsCapture, to, from);
151
+ b.create <gpu::BarrierOp>();
152
+ insertCopyLoops (b, to, from);
156
153
}
157
154
158
155
// / Promotes a function argument to workgroup memory in the given function. The
0 commit comments