Skip to content

Commit

Permalink
[mlir][Linalg] Avoid using scf.parallel for non-parallel loops in Lin…
Browse files Browse the repository at this point in the history
…alg ops.

Modifying the loop nest builder for generating scf.parallel loops to
not generate scf.parallel loops for non-parallel iterator types in
Linalg operations. The existing implementation incorrectly generated
scf.parallel for all tiled loops. It is rectified by refactoring logic
used while lowering to loops that accounted for this.

Differential Revision: https://reviews.llvm.org/D80188
  • Loading branch information
MaheshRavishankar committed May 27, 2020
1 parent a714148 commit 5759e47
Show file tree
Hide file tree
Showing 8 changed files with 312 additions and 78 deletions.
31 changes: 31 additions & 0 deletions mlir/include/mlir/Dialect/Linalg/Utils/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,21 @@
#ifndef MLIR_DIALECT_LINALG_UTILS_H_
#define MLIR_DIALECT_LINALG_UTILS_H_

#include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
#include "mlir/Dialect/Linalg/EDSC/Builders.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"

#include "llvm/ADT/SetVector.h"

using mlir::edsc::intrinsics::AffineIndexedValue;
using mlir::edsc::intrinsics::StdIndexedValue;

namespace mlir {
class AffineExpr;
class AffineForOp;
class AffineMap;
class OperationFolder;
class PatternRewriter;
Expand Down Expand Up @@ -49,6 +56,15 @@ struct RegionMatcher {
static Optional<BinaryOpKind> matchAsScalarBinaryOp(GenericOp op);
};

/// Checks if an iterator_type attribute is parallel.
bool isParallelIteratorType(Attribute attr);

/// Checks if an iterator_type attribute is parallel.
bool isReductionIteratorType(Attribute attr);

/// Checks if an iterator_type attribute is parallel.
bool isWindowIteratorType(Attribute attr);

/// Checks whether the specific `producer` is the last write to exactly the
/// whole `consumedView`. This checks structural dominance, that the dependence
/// is a RAW without any interleaved write to any piece of `consumedView`.
Expand Down Expand Up @@ -141,6 +157,21 @@ void applyPermutationToVector(SmallVector<T, N> &inVec,
inVec = auxVec;
}

/// Utility class used to generate nested loops with ranges described by
/// `loopRanges` and loop type described by the `iteratorTypes`. `allIvs` is
/// populated with induction variables for all generated loops on return, with
/// `fun` used to generate the body of the innermost loop.
template <typename LoopTy>
struct GenerateLoopNest {
using IndexedValueTy =
typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
AffineIndexedValue, StdIndexedValue>::type;
static void doit(MutableArrayRef<Value> allIvs,
ArrayRef<SubViewOp::Range> loopRanges,
ArrayRef<Attribute> iteratorTypes,
std::function<void(void)> fun);
};

} // namespace linalg
} // namespace mlir

Expand Down
81 changes: 8 additions & 73 deletions mlir/lib/Dialect/Linalg/Transforms/Loops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -487,80 +487,9 @@ class LinalgScopedEmitter<IndexedValueType, IndexedGenericOp> {
}
};

namespace {
/// Helper struct to generate the loop nest for the op. This factored out here
/// to be able to partially specialize this for different LoopTy.
template <typename LoopTy, typename ConcreteOpTy>
class GenerateLoopNest {
public:
using IndexedValueTy =
typename std::conditional<std::is_same<LoopTy, AffineForOp>::value,
AffineIndexedValue, StdIndexedValue>::type;
static void doit(ConcreteOpTy linalgOp, ArrayRef<SubViewOp::Range> loopRanges,
MutableArrayRef<Value> allIvs) {
GenericLoopNestRangeBuilder<LoopTy>(allIvs, loopRanges)([&] {
SmallVector<Value, 4> allIvValues(allIvs.begin(), allIvs.end());
LinalgScopedEmitter<IndexedValueTy,
ConcreteOpTy>::emitScalarImplementation(allIvValues,
linalgOp);
});
}
};

/// Generates loop nest using scf.parallel. scf.parallel is only used for the
/// outer parallel loops. All other loops are generated using scf.for
/// operation.
template <typename ConcreteOpTy>
class GenerateLoopNest<scf::ParallelOp, ConcreteOpTy> {
public:
using IndexedValueTy = StdIndexedValue;

static void doit(ConcreteOpTy linalgOp, ArrayRef<SubViewOp::Range> loopRanges,
MutableArrayRef<Value> allIvs) {
// Only generate scf.parallel for outer consecutive "parallel"
// iterator_types.
// TODO(ravishankarm): Generate scf.parallel for all "parallel" iterator
// types, not just the outer most ones. Also handle "reduction" iterator
// types.
auto nOuterPar = linalgOp.iterator_types()
.getValue()
.take_while([](Attribute attr) {
return attr.cast<StringAttr>().getValue() ==
getParallelIteratorTypeName();
})
.size();
// If there are no outer parallel loops, then number of loop ops is same as
// the number of loops, and they are all scf.for ops.
if (nOuterPar) {
GenericLoopNestRangeBuilder<scf::ParallelOp>(
allIvs.take_front(nOuterPar), loopRanges.take_front(nOuterPar))([&] {
GenericLoopNestRangeBuilder<scf::ForOp>(
allIvs.drop_front(nOuterPar),
loopRanges.drop_front(nOuterPar))([&] {
SmallVector<Value, 4> allIvValues(allIvs.begin(), allIvs.end());
LinalgScopedEmitter<StdIndexedValue, ConcreteOpTy>::
emitScalarImplementation(allIvValues, linalgOp);
});
});
} else {
// If there are no parallel loops then fallback to generating all scf.for
// operations.
GenericLoopNestRangeBuilder<scf::ForOp>(allIvs, loopRanges)([&] {
SmallVector<Value, 4> allIvValues(allIvs.begin(), allIvs.end());
LinalgScopedEmitter<StdIndexedValue,
ConcreteOpTy>::emitScalarImplementation(allIvValues,
linalgOp);
});
}
}
};
} // namespace

template <typename LoopTy, typename ConcreteOpTy>
Optional<LinalgLoops> linalgOpToLoopsImpl(Operation *op, OpBuilder &builder) {
using Impl = GenerateLoopNest<LoopTy, ConcreteOpTy>;
using IndexedValueTy =
typename GenerateLoopNest<LoopTy, ConcreteOpTy>::IndexedValueTy;
using IndexedValueTy = typename GenerateLoopNest<LoopTy>::IndexedValueTy;

ScopedContext scope(builder, op->getLoc());

Expand Down Expand Up @@ -591,7 +520,13 @@ Optional<LinalgLoops> linalgOpToLoopsImpl(Operation *op, OpBuilder &builder) {
emitLoopRanges(scope.getBuilderRef(), scope.getLocation(), invertedMap,
getViewSizes(builder, linalgOp));
assert(loopRanges.size() == allIvs.size());
Impl::doit(linalgOp, loopRanges, allIvs);
GenerateLoopNest<LoopTy>::doit(
allIvs, loopRanges, linalgOp.iterator_types().getValue(), [&] {
SmallVector<Value, 4> allIvValues(allIvs.begin(), allIvs.end());
LinalgScopedEmitter<IndexedValueTy,
ConcreteOpTy>::emitScalarImplementation(allIvValues,
linalgOp);
});
// Number of loop ops might be different from the number of ivs since some
// loops like affine.parallel and scf.parallel have multiple ivs.
llvm::SetVector<Operation *> loopSet;
Expand Down
10 changes: 7 additions & 3 deletions mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,16 +376,20 @@ Optional<TiledLinalgOp> static tileLinalgOpImpl(
// 3. Create the tiled loops.
LinalgOp res = op;
SmallVector<Value, 4> ivs(loopRanges.size());
GenericLoopNestRangeBuilder<LoopTy>(ivs, loopRanges)([&] {
SmallVector<Attribute, 4> iteratorTypes =
llvm::to_vector<4>(op.iterator_types().cast<ArrayAttr>().getValue());
if (!options.interchangeVector.empty())
applyPermutationToVector(iteratorTypes, options.interchangeVector);
GenerateLoopNest<LoopTy>::doit(ivs, loopRanges, iteratorTypes, [&] {
auto &b = ScopedContext::getBuilderRef();
auto loc = ScopedContext::getLocation();
SmallVector<Value, 4> ivValues(ivs.begin(), ivs.end());

// If we have to apply a permutation to the tiled loop nest, we have to
// reorder the induction variables This permutation is the right one
// assuming that loopRanges have previously been permuted by
// (i,j,k)->(k,i,j) So this permutation should be the inversePermutation of
// that one: (d0,d1,d2)->(d2,d0,d1)
// (i,j,k)->(k,i,j) So this permutation should be the inversePermutation
// of that one: (d0,d1,d2)->(d2,d0,d1)
if (!options.interchangeVector.empty())
ivValues = applyMapToValues(b, loc, invPermutationMap, ivValues);

Expand Down
89 changes: 89 additions & 0 deletions mlir/lib/Dialect/Linalg/Utils/Utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "mlir/Dialect/Affine/IR/AffineOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
#include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
#include "mlir/Dialect/SCF/EDSC/Builders.h"
#include "mlir/Dialect/SCF/SCF.h"
#include "mlir/Dialect/StandardOps/IR/Ops.h"
#include "mlir/IR/AffineExpr.h"
Expand Down Expand Up @@ -101,3 +102,91 @@ mlir::linalg::getAssumedNonViewOperands(LinalgOp linalgOp) {
}
return res;
}

bool mlir::linalg::isParallelIteratorType(Attribute attr) {
if (auto strAttr = attr.dyn_cast<StringAttr>()) {
return strAttr.getValue() == getParallelIteratorTypeName();
}
return false;
}

bool mlir::linalg::isReductionIteratorType(Attribute attr) {
if (auto strAttr = attr.dyn_cast<StringAttr>()) {
return strAttr.getValue() == getReductionIteratorTypeName();
}
return false;
}

bool mlir::linalg::isWindowIteratorType(Attribute attr) {
if (auto strAttr = attr.dyn_cast<StringAttr>()) {
return strAttr.getValue() == getWindowIteratorTypeName();
}
return false;
}

/// Explicit instantiation of loop nest generator for different loop types.
template struct mlir::linalg::GenerateLoopNest<scf::ForOp>;
template struct mlir::linalg::GenerateLoopNest<scf::ParallelOp>;
template struct mlir::linalg::GenerateLoopNest<AffineForOp>;

/// Specialization of loop nest generator for scf.parallel loops to handle
/// iterator types that are not parallel. These are generated as sequential
/// loops.
template <>
void mlir::linalg::GenerateLoopNest<scf::ForOp>::doit(
MutableArrayRef<Value> allIvs, ArrayRef<SubViewOp::Range> loopRanges,
ArrayRef<Attribute> iteratorTypes, std::function<void(void)> fun) {
edsc::GenericLoopNestRangeBuilder<scf::ForOp>(allIvs, loopRanges)(fun);
}

template <>
void mlir::linalg::GenerateLoopNest<AffineForOp>::doit(
MutableArrayRef<Value> allIvs, ArrayRef<SubViewOp::Range> loopRanges,
ArrayRef<Attribute> iteratorTypes, std::function<void(void)> fun) {
edsc::GenericLoopNestRangeBuilder<AffineForOp>(allIvs, loopRanges)(fun);
}

template <>
void mlir::linalg::GenerateLoopNest<scf::ParallelOp>::doit(
MutableArrayRef<Value> allIvs, ArrayRef<SubViewOp::Range> loopRanges,
ArrayRef<Attribute> iteratorTypes, std::function<void(void)> fun) {
// Check if there is nothing to do here. This is also the recursion
// termination.
if (loopRanges.empty())
return;
size_t nOuterPar = iteratorTypes.take_front(loopRanges.size())
.take_while(isParallelIteratorType)
.size();
if (nOuterPar == 0 && loopRanges.size() == 1)
// Generate the sequential for loop for the remaining non-parallel loop.
return GenerateLoopNest<scf::ForOp>::doit(allIvs, loopRanges, iteratorTypes,
fun);
if (nOuterPar == 0) {
// The immediate outer loop is not parallel. Generate a scf.for op for this
// loop, but there might be subsequent loops that are parallel. Use
// recursion to find those.
auto nestedFn = [&]() {
GenerateLoopNest<scf::ParallelOp>::doit(allIvs.drop_front(),
loopRanges.drop_front(),
iteratorTypes.drop_front(), fun);
};
return GenerateLoopNest<scf::ForOp>::doit(allIvs[0], loopRanges[0],
iteratorTypes[0], nestedFn);
}
if (nOuterPar == loopRanges.size()) {
// All loops are parallel, so generate the scf.parallel op.
return edsc::GenericLoopNestRangeBuilder<scf::ParallelOp>(allIvs,
loopRanges)(fun);
}
// Generate scf.parallel for the outer parallel loops. The next inner loop is
// sequential, but there might be more parallel loops after that. So recurse
// into the same method.
auto nestedFn = [&]() {
GenerateLoopNest<scf::ParallelOp>::doit(
allIvs.drop_front(nOuterPar), loopRanges.drop_front(nOuterPar),
iteratorTypes.drop_front(nOuterPar), fun);
};
return GenerateLoopNest<scf::ParallelOp>::doit(
allIvs.take_front(nOuterPar), loopRanges.take_front(nOuterPar),
iteratorTypes.take_front(nOuterPar), nestedFn);
}
38 changes: 37 additions & 1 deletion mlir/test/Dialect/Linalg/parallel_loops.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,42 @@ func @lower_outer_parallel(%A: memref<?x?x?x?xf32>, %B: memref<?x?x?xf32>) {
// CHECK-DAG: %[[D3:.*]] = dim %{{.*}}, 3
// CHECK: scf.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]])
// CHECK: scf.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]]
// CHECK: scf.for %[[IV3:.*]] = %[[C0]] to %[[D3]] step %[[C1]]
// CHECK: scf.parallel (%[[IV3:.*]]) = (%[[C0]]) to (%[[D3]]) step (%[[C1]])
// CHECK: load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]]
// CHECK: store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV1]], %[[IV3]]]

// -----

#accesses = [
affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d1, d2, d3, d4, d5)>,
affine_map<(d0, d1, d2, d3, d4, d5) -> (d0, d2, d4, d5)>
]
#trait = {
args_in = 1,
args_out = 1,
iterator_types = ["parallel", "parallel", "reduction", "parallel", "parallel", "reduction"],
indexing_maps = #accesses
}

func @lower_mixed_parallel(%A: memref<?x?x?x?x?x?xf32>, %B: memref<?x?x?x?xf32>) {
linalg.generic #trait %A, %B {
^bb0(%a: f32, %b: f32):
linalg.yield %a: f32
} : memref<?x?x?x?x?x?xf32>, memref<?x?x?x?xf32>
return
}
// CHECK-LABEL: @lower_mixed_parallel
// CHECK-DAG: %[[C0:.*]] = constant 0
// CHECK-DAG: %[[C1:.*]] = constant 1
// CHECK-DAG: %[[D0:.*]] = dim %{{.*}}, 0
// CHECK-DAG: %[[D1:.*]] = dim %{{.*}}, 1
// CHECK-DAG: %[[D2:.*]] = dim %{{.*}}, 2
// CHECK-DAG: %[[D3:.*]] = dim %{{.*}}, 3
// CHECK-DAG: %[[D4:.*]] = dim %{{.*}}, 4
// CHECK-DAG: %[[D5:.*]] = dim %{{.*}}, 5
// CHECK: scf.parallel (%[[IV0:.*]], %[[IV1:.*]]) = (%[[C0]], %[[C0]]) to (%[[D0]], %[[D1]]) step (%[[C1]], %[[C1]])
// CHECK: scf.for %[[IV2:.*]] = %[[C0]] to %[[D2]] step %[[C1]]
// CHECK: scf.parallel (%[[IV3:.*]], %[[IV4:.*]]) = (%[[C0]], %[[C0]]) to (%[[D3]], %[[D4]]) step (%[[C1]], %[[C1]])
// CHECK: scf.for %[[IV5:.*]] = %[[C0]] to %[[D5]] step %[[C1]]
// CHECK: load %{{.*}}[%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]], %[[IV4]], %[[IV5]]]
// CHECK: store %{{.*}}, %{{.*}}[%[[IV0]], %[[IV2]], %[[IV4]], %[[IV5]]]
Loading

0 comments on commit 5759e47

Please sign in to comment.