-
Notifications
You must be signed in to change notification settings - Fork 15k
Revert "[mlir][scf] Add parallelLoopUnrollByFactors()" #164949
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This reverts commit 86a2073.
|
@llvm/pr-subscribers-mlir-scf @llvm/pr-subscribers-mlir-core Author: None (fabrizio-indirli) ChangesReverts llvm/llvm-project#163806 Patch is 27.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/164949.diff 7 Files Affected:
diff --git a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
index cdc52f4f3668c..ecd829ed14add 100644
--- a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
+++ b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h
@@ -221,45 +221,6 @@ FailureOr<scf::ForallOp> normalizeForallOp(RewriterBase &rewriter,
/// 4. Each region iter arg and result has exactly one use
bool isPerfectlyNestedForLoops(MutableArrayRef<LoopLikeOpInterface> loops);
-/// Generate unrolled copies of an scf loop's 'loopBodyBlock', with 'iterArgs'
-/// and 'yieldedValues' as the block arguments and yielded values of the loop.
-/// The content of the loop body is replicated 'unrollFactor' times, calling
-/// 'ivRemapFn' to remap 'iv' for each unrolled body. If specified, annotates
-/// the Ops in each unrolled iteration using annotateFn. If provided,
-/// 'clonedToSrcOpsMap' is populated with the mappings from the cloned ops to
-/// the original op.
-void generateUnrolledLoop(
- Block *loopBodyBlock, Value iv, uint64_t unrollFactor,
- function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
- function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
- ValueRange iterArgs, ValueRange yieldedValues,
- IRMapping *clonedToSrcOpsMap = nullptr);
-
-/// Unroll this scf::Parallel loop by the specified unroll factors. Returns the
-/// unrolled loop if the unroll succeded; otherwise returns failure if the loop
-/// cannot be unrolled either due to restrictions or to invalid unroll factors.
-/// Requires positive loop bounds and step. If specified, annotates the Ops in
-/// each unrolled iteration by applying `annotateFn`.
-/// If provided, 'clonedToSrcOpsMap' is populated with the mappings from the
-/// cloned ops to the original op.
-FailureOr<scf::ParallelOp> parallelLoopUnrollByFactors(
- scf::ParallelOp op, ArrayRef<uint64_t> unrollFactors,
- RewriterBase &rewriter,
- function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn = nullptr,
- IRMapping *clonedToSrcOpsMap = nullptr);
-
-/// Get constant trip counts for each of the induction variables of the given
-/// loop operation. If any of the loop's trip counts is not constant, return an
-/// empty vector.
-llvm::SmallVector<int64_t>
-getConstLoopTripCounts(mlir::LoopLikeOpInterface loopOp);
-
-namespace scf {
-/// Helper function to compute the difference between two values. This is used
-/// by the loop implementations to compute the trip count.
-std::optional<llvm::APSInt> computeUbMinusLb(Value lb, Value ub, bool isSigned);
-} // namespace scf
-
} // namespace mlir
#endif // MLIR_DIALECT_SCF_UTILS_UTILS_H_
diff --git a/mlir/lib/Dialect/SCF/IR/SCF.cpp b/mlir/lib/Dialect/SCF/IR/SCF.cpp
index 395b52fe46d25..744a5951330a3 100644
--- a/mlir/lib/Dialect/SCF/IR/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/IR/SCF.cpp
@@ -15,7 +15,6 @@
#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
#include "mlir/Dialect/MemRef/IR/MemRef.h"
#include "mlir/Dialect/SCF/IR/DeviceMappingInterface.h"
-#include "mlir/Dialect/SCF/Utils/Utils.h"
#include "mlir/Dialect/Tensor/IR/Tensor.h"
#include "mlir/IR/BuiltinAttributes.h"
#include "mlir/IR/IRMapping.h"
@@ -112,6 +111,24 @@ static TerminatorTy verifyAndGetTerminator(Operation *op, Region ®ion,
return nullptr;
}
+/// Helper function to compute the difference between two values. This is used
+/// by the loop implementations to compute the trip count.
+static std::optional<llvm::APSInt> computeUbMinusLb(Value lb, Value ub,
+ bool isSigned) {
+ llvm::APSInt diff;
+ auto addOp = ub.getDefiningOp<arith::AddIOp>();
+ if (!addOp)
+ return std::nullopt;
+ if ((isSigned && !addOp.hasNoSignedWrap()) ||
+ (!isSigned && !addOp.hasNoUnsignedWrap()))
+ return std::nullopt;
+
+ if (addOp.getLhs() != lb ||
+ !matchPattern(addOp.getRhs(), m_ConstantInt(&diff)))
+ return std::nullopt;
+ return diff;
+}
+
//===----------------------------------------------------------------------===//
// ExecuteRegionOp
//===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
index 2d989d50bb8ac..10eae8906ce31 100644
--- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp
@@ -291,61 +291,47 @@ static Value ceilDivPositive(OpBuilder &builder, Location loc, Value dividend,
return arith::DivUIOp::create(builder, loc, sum, divisor);
}
-void mlir::generateUnrolledLoop(
- Block *loopBodyBlock, Value iv, uint64_t unrollFactor,
+/// Generates unrolled copies of scf::ForOp 'loopBodyBlock', with
+/// associated 'forOpIV' by 'unrollFactor', calling 'ivRemapFn' to remap
+/// 'forOpIV' for each unrolled body. If specified, annotates the Ops in each
+/// unrolled iteration using annotateFn.
+static void generateUnrolledLoop(
+ Block *loopBodyBlock, Value forOpIV, uint64_t unrollFactor,
function_ref<Value(unsigned, Value, OpBuilder)> ivRemapFn,
function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
- ValueRange iterArgs, ValueRange yieldedValues,
- IRMapping *clonedToSrcOpsMap) {
-
- // Check if the op was cloned from another source op, and return it if found
- // (or the same op if not found)
- auto findOriginalSrcOp =
- [](Operation *op, const IRMapping &clonedToSrcOpsMap) -> Operation * {
- Operation *srcOp = op;
- // If the source op derives from another op: traverse the chain to find the
- // original source op
- while (srcOp && clonedToSrcOpsMap.contains(srcOp))
- srcOp = clonedToSrcOpsMap.lookup(srcOp);
- return srcOp;
- };
-
+ ValueRange iterArgs, ValueRange yieldedValues) {
// Builder to insert unrolled bodies just before the terminator of the body of
- // the loop.
+ // 'forOp'.
auto builder = OpBuilder::atBlockTerminator(loopBodyBlock);
- static const auto noopAnnotateFn = [](unsigned, Operation *, OpBuilder) {};
+ constexpr auto defaultAnnotateFn = [](unsigned, Operation *, OpBuilder) {};
if (!annotateFn)
- annotateFn = noopAnnotateFn;
+ annotateFn = defaultAnnotateFn;
// Keep a pointer to the last non-terminator operation in the original block
// so that we know what to clone (since we are doing this in-place).
Block::iterator srcBlockEnd = std::prev(loopBodyBlock->end(), 2);
- // Unroll the contents of the loop body (append unrollFactor - 1 additional
- // copies).
+ // Unroll the contents of 'forOp' (append unrollFactor - 1 additional copies).
SmallVector<Value, 4> lastYielded(yieldedValues);
for (unsigned i = 1; i < unrollFactor; i++) {
- // Prepare operand map.
IRMapping operandMap;
+
+ // Prepare operand map.
operandMap.map(iterArgs, lastYielded);
// If the induction variable is used, create a remapping to the value for
// this unrolled instance.
- if (!iv.use_empty()) {
- Value ivUnroll = ivRemapFn(i, iv, builder);
- operandMap.map(iv, ivUnroll);
+ if (!forOpIV.use_empty()) {
+ Value ivUnroll = ivRemapFn(i, forOpIV, builder);
+ operandMap.map(forOpIV, ivUnroll);
}
// Clone the original body of 'forOp'.
for (auto it = loopBodyBlock->begin(); it != std::next(srcBlockEnd); it++) {
- Operation *srcOp = &(*it);
- Operation *clonedOp = builder.clone(*srcOp, operandMap);
+ Operation *clonedOp = builder.clone(*it, operandMap);
annotateFn(i, clonedOp, builder);
- if (clonedToSrcOpsMap)
- clonedToSrcOpsMap->map(clonedOp,
- findOriginalSrcOp(srcOp, *clonedToSrcOpsMap));
}
// Update yielded values.
@@ -1558,116 +1544,3 @@ bool mlir::isPerfectlyNestedForLoops(
}
return true;
}
-
-std::optional<llvm::APSInt> mlir::scf::computeUbMinusLb(Value lb, Value ub,
- bool isSigned) {
- llvm::APSInt diff;
- auto addOp = ub.getDefiningOp<arith::AddIOp>();
- if (!addOp)
- return std::nullopt;
- if ((isSigned && !addOp.hasNoSignedWrap()) ||
- (!isSigned && !addOp.hasNoUnsignedWrap()))
- return std::nullopt;
-
- if (addOp.getLhs() != lb ||
- !matchPattern(addOp.getRhs(), m_ConstantInt(&diff)))
- return std::nullopt;
- return diff;
-}
-
-llvm::SmallVector<int64_t>
-mlir::getConstLoopTripCounts(mlir::LoopLikeOpInterface loopOp) {
- std::optional<SmallVector<OpFoldResult>> loBnds = loopOp.getLoopLowerBounds();
- std::optional<SmallVector<OpFoldResult>> upBnds = loopOp.getLoopUpperBounds();
- std::optional<SmallVector<OpFoldResult>> steps = loopOp.getLoopSteps();
- if (!loBnds || !upBnds || !steps)
- return {};
- llvm::SmallVector<int64_t> tripCounts;
- for (auto [lb, ub, step] : llvm::zip(*loBnds, *upBnds, *steps)) {
- std::optional<llvm::APInt> numIter = constantTripCount(
- lb, ub, step, /*isSigned=*/true, scf::computeUbMinusLb);
- if (!numIter)
- return {};
- tripCounts.push_back(numIter->getSExtValue());
- }
- return tripCounts;
-}
-
-FailureOr<scf::ParallelOp> mlir::parallelLoopUnrollByFactors(
- scf::ParallelOp op, ArrayRef<uint64_t> unrollFactors,
- RewriterBase &rewriter,
- function_ref<void(unsigned, Operation *, OpBuilder)> annotateFn,
- IRMapping *clonedToSrcOpsMap) {
- const unsigned numLoops = op.getNumLoops();
- assert(llvm::none_of(unrollFactors, [](uint64_t f) { return f == 0; }) &&
- "Expected positive unroll factors");
- assert((!unrollFactors.empty() && (unrollFactors.size() <= numLoops)) &&
- "Expected non-empty unroll factors of size <= to the number of loops");
-
- // Bail out if no valid unroll factors were provided
- if (llvm::all_of(unrollFactors, [](uint64_t f) { return f == 1; }))
- return rewriter.notifyMatchFailure(
- op, "Unrolling not applied if all factors are 1");
-
- // Return if the loop body is empty.
- if (llvm::hasSingleElement(op.getBody()->getOperations()))
- return rewriter.notifyMatchFailure(op, "Cannot unroll an empty loop body");
-
- // If the provided unroll factors do not cover all the loop dims, they are
- // applied to the inner loop dimensions.
- const unsigned firstLoopDimIdx = numLoops - unrollFactors.size();
-
- // Make sure that the unroll factors divide the iteration space evenly
- // TODO: Support unrolling loops with dynamic iteration spaces.
- const llvm::SmallVector<int64_t> tripCounts = getConstLoopTripCounts(op);
- if (tripCounts.empty())
- return rewriter.notifyMatchFailure(
- op, "Failed to compute constant trip counts for the loop. Note that "
- "dynamic loop sizes are not supported.");
-
- for (unsigned dimIdx = firstLoopDimIdx; dimIdx < numLoops; dimIdx++) {
- const uint64_t unrollFactor = unrollFactors[dimIdx - firstLoopDimIdx];
- if (tripCounts[dimIdx] % unrollFactor)
- return rewriter.notifyMatchFailure(
- op, "Unroll factors don't divide the iteration space evenly");
- }
-
- std::optional<SmallVector<OpFoldResult>> maybeFoldSteps = op.getLoopSteps();
- if (!maybeFoldSteps)
- return rewriter.notifyMatchFailure(op, "Failed to retrieve loop steps");
- llvm::SmallVector<size_t> steps{};
- for (auto step : *maybeFoldSteps)
- steps.push_back(static_cast<size_t>(*getConstantIntValue(step)));
-
- for (unsigned dimIdx = firstLoopDimIdx; dimIdx < numLoops; dimIdx++) {
- const uint64_t unrollFactor = unrollFactors[dimIdx - firstLoopDimIdx];
- if (unrollFactor == 1)
- continue;
- const size_t origStep = steps[dimIdx];
- const int64_t newStep = origStep * unrollFactor;
- IRMapping clonedToSrcOpsMap;
-
- ValueRange iterArgs = ValueRange(op.getRegionIterArgs());
- auto yieldedValues = op.getBody()->getTerminator()->getOperands();
-
- generateUnrolledLoop(
- op.getBody(), op.getInductionVars()[dimIdx], unrollFactor,
- [&](unsigned i, Value iv, OpBuilder b) {
- // iv' = iv + step * i;
- const AffineExpr expr = b.getAffineDimExpr(0) + (origStep * i);
- const auto map =
- b.getDimIdentityMap().dropResult(0).insertResult(expr, 0);
- return affine::AffineApplyOp::create(b, iv.getLoc(), map,
- ValueRange{iv});
- },
- /*annotateFn*/ annotateFn, iterArgs, yieldedValues, &clonedToSrcOpsMap);
-
- // Update loop step
- auto prevInsertPoint = rewriter.saveInsertionPoint();
- rewriter.setInsertionPoint(op);
- op.getStepMutable()[dimIdx].assign(
- arith::ConstantIndexOp::create(rewriter, op.getLoc(), newStep));
- rewriter.restoreInsertionPoint(prevInsertPoint);
- }
- return op;
-}
diff --git a/mlir/test/Dialect/SCF/parallel-loop-unroll.mlir b/mlir/test/Dialect/SCF/parallel-loop-unroll.mlir
deleted file mode 100644
index 12b502e996c60..0000000000000
--- a/mlir/test/Dialect/SCF/parallel-loop-unroll.mlir
+++ /dev/null
@@ -1,171 +0,0 @@
-// RUN: mlir-opt %s -test-parallel-loop-unrolling='unroll-factors=1,2' -split-input-file | FileCheck %s
-// RUN: mlir-opt %s -test-parallel-loop-unrolling='unroll-factors=1,2 loop-depth=1' -split-input-file | FileCheck %s --check-prefix CHECK-UNROLL-INNER
-// RUN: mlir-opt %s -test-parallel-loop-unrolling='unroll-factors=3,1' -split-input-file | FileCheck %s --check-prefix CHECK-UNROLL-BY-3
-
-func.func @unroll_simple_parallel_loop(%src: memref<1x16x12xf32>, %dst: memref<1x16x12xf32>) {
- %c12 = arith.constant 12 : index
- %c16 = arith.constant 16 : index
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- scf.parallel (%arg2, %arg3, %arg4) = (%c0, %c0, %c0) to (%c1, %c16, %c12) step (%c1, %c1, %c1) {
- %read = memref.load %src[%arg2, %arg3, %arg4] : memref<1x16x12xf32>
- memref.store %read, %dst[%arg2, %arg3, %arg4] : memref<1x16x12xf32>
- scf.reduce
- }
- return
-}
-
-// CHECK-LABEL: func @unroll_simple_parallel_loop
-// CHECK-SAME: ([[ARG0:%.*]]: memref<1x16x12xf32>, [[ARG1:%.*]]: memref<1x16x12xf32>)
-// CHECK-DAG: [[C0:%.*]] = arith.constant 0 : index
-// CHECK-DAG: [[C1:%.*]] = arith.constant 1 : index
-// CHECK-DAG: [[C2:%.*]] = arith.constant 2 : index
-// CHECK-DAG: [[C12:%.*]] = arith.constant 12 : index
-// CHECK-DAG: [[C16:%.*]] = arith.constant 16 : index
-// CHECK: scf.parallel ([[IV0:%.*]], [[IV1:%.*]], [[IV2:%.*]]) = ([[C0]], [[C0]], [[C0]]) to ([[C1]], [[C16]], [[C12]]) step ([[C1]], [[C1]], [[C2]])
-// CHECK: [[LOADED1:%.*]] = memref.load [[ARG0]][[[IV0]], [[IV1]], [[IV2]]] : memref<1x16x12xf32>
-// CHECK: memref.store [[LOADED1]], [[ARG1]][[[IV0]], [[IV1]], [[IV2]]] : memref<1x16x12xf32>
-// CHECK: [[UNR_IV2:%.*]] = affine.apply {{.*}}([[IV2]])
-// CHECK: [[LOADED2:%.*]] = memref.load [[ARG0]][[[IV0]], [[IV1]], [[UNR_IV2]]] : memref<1x16x12xf32>
-// CHECK: memref.store [[LOADED2]], [[ARG1]][[[IV0]], [[IV1]], [[UNR_IV2]]] : memref<1x16x12xf32>
-
-// -----
-
-func.func @negative_unroll_factors_dont_divide_evenly(%src: memref<1x16x12xf32>, %dst: memref<1x16x12xf32>) {
- %c12 = arith.constant 12 : index
- %c16 = arith.constant 16 : index
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- scf.parallel (%arg2, %arg3, %arg4) = (%c0, %c0, %c0) to (%c1, %c16, %c12) step (%c1, %c1, %c1) {
- %read = memref.load %src[%arg2, %arg3, %arg4] : memref<1x16x12xf32>
- memref.store %read, %dst[%arg2, %arg3, %arg4] : memref<1x16x12xf32>
- scf.reduce
- }
- return
-}
-
-// CHECK-UNROLL-BY-3-LABEL: func @negative_unroll_factors_dont_divide_evenly
-// CHECK-UNROLL-BY-3-SAME: ([[ARG0:%.*]]: memref<1x16x12xf32>, [[ARG1:%.*]]: memref<1x16x12xf32>)
-// CHECK-UNROLL-BY-3: [[C1:%.*]] = arith.constant 1 : index
-// CHECK-UNROLL-BY-3: scf.parallel ([[IV0:%.*]], [[IV1:%.*]], [[IV2:%.*]]) = {{.*}} step ([[C1]], [[C1]], [[C1]])
-// CHECK-UNROLL-BY-3: [[LOADED:%.*]] = memref.load [[ARG0]][[[IV0]], [[IV1]], [[IV2]]] : memref<1x16x12xf32>
-// CHECK-UNROLL-BY-3: memref.store [[LOADED]], [[ARG1]][[[IV0]], [[IV1]], [[IV2]]] : memref<1x16x12xf32>
-// CHECK-UNROLL-BY-3-NOT: affine.apply
-// CHECK-UNROLL-BY-3-NOT: memref.load
-// CHECK-UNROLL-BY-3-NOT: memref.store
-
-// -----
-
-func.func @unroll_outer_nested_parallel_loop(%src: memref<5x16x12x4x4xf32>, %dst: memref<5x16x12x4x4xf32>) {
- %c4 = arith.constant 4 : index
- %c12 = arith.constant 12 : index
- %c16 = arith.constant 16 : index
- %c5 = arith.constant 5 : index
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- scf.parallel (%arg3, %arg4, %arg5) = (%c0, %c0, %c0) to (%c5, %c16, %c12) step (%c1, %c1, %c1) {
- scf.parallel (%arg6, %arg7) = (%c0, %c0) to (%c4, %c4) step (%c1, %c1) {
- %0 = affine.apply affine_map<(d0, d1) -> (d0 + (d1 floordiv 4) * 4)>(%arg4, %arg6)
- %1 = affine.apply affine_map<(d0, d1) -> (d0 + (d1 floordiv 4) * 4)>(%arg5, %arg7)
- %subv_in = memref.subview %src[%arg3, %0, %1, 0, 0] [1, 1, 1, 4, 4] [1, 1, 1, 1, 1] : memref<5x16x12x4x4xf32> to memref<4x4xf32, strided<[4, 1], offset: ?>>
- %subv_out = memref.subview %dst[%arg3, %0, %1, 0, 0] [1, 1, 1, 4, 4] [1, 1, 1, 1, 1] : memref<5x16x12x4x4xf32> to memref<4x4xf32, strided<[4, 1], offset: ?>>
- linalg.erf ins(%subv_in : memref<4x4xf32, strided<[4, 1], offset: ?>>) outs(%subv_out : memref<4x4xf32, strided<[4, 1], offset: ?>>)
- scf.reduce
- }
- scf.reduce
- }
- return
-}
-
-// CHECK-UNROLL-BY-3-LABEL: func @unroll_outer_nested_parallel_loop
-// CHECK-LABEL: func @unroll_outer_nested_parallel_loop
-// CHECK-SAME: ([[ARG0:%.*]]: memref<5x16x12x4x4xf32>, [[ARG1:%.*]]: memref<5x16x12x4x4xf32>)
-// CHECK-DAG: [[C0:%.*]] = arith.constant 0 : index
-// CHECK-DAG: [[C1:%.*]] = arith.constant 1 : index
-// CHECK-DAG: [[C2:%.*]] = arith.constant 2 : index
-// CHECK-DAG: [[C4:%.*]] = arith.constant 4 : index
-// CHECK-DAG: [[C5:%.*]] = arith.constant 5 : index
-// CHECK-DAG: [[C12:%.*]] = arith.constant 12 : index
-// CHECK-DAG: [[C16:%.*]] = arith.constant 16 : index
-// CHECK: scf.parallel ([[OUTV0:%.*]], [[OUTV1:%.*]], [[OUTV2:%.*]]) = ([[C0]], [[C0]], [[C0]]) to ([[C5]], [[C16]], [[C12]]) step ([[C1]], [[C1]], [[C2]])
-// CHECK: scf.parallel ([[INV0:%.*]], [[INV1:%.*]]) = ([[C0]], [[C0]]) to ([[C4]], [[C4]]) step ([[C1]], [[C1]])
-// CHECK: affine.apply {{.*}}([[OUTV1]], [[INV0]])
-// CHECK: affine.apply {{.*}}([[OUTV2]], [[INV1]])
-// CHECK: linalg.erf
-
-// CHECK: [[UNR_OUTV2:%.*]] = affine.apply {{.*}}([[OUTV2]])
-// CHECK: scf.parallel ([[INV0B:%.*]], [[INV1B:%.*]]) = ([[C0]], [[C0]]) to ([[C4]], [[C4]]) step ([[C1]], [[C1]])
-// CHECK: affine.apply {{.*}}([[OUTV1]], [[INV0B]])
-// CHECK: affine.apply {{.*}}([[UNR_OUTV2]], [[INV1B]])
-// CHECK: linalg.erf
-
-// -----
-
-func.func @negative_unroll_dynamic_parallel_loop(%src: memref<1x16x12xf32>, %dst: memref<1x16x12xf32>, %ub3: index) {
- %c12 = arith.constant 12 : index
- %c16 = arith.constant 16 : index
- %c0 = arith.constant 0 : index
- %c1 = arith.constant 1 : index
- scf.parallel (%arg2, %arg3, %arg4) = (%c0, %c0, %c0) to (%c1, %c16, %ub3) step (%c1, %c1, %c1) {
- %read = memref.load %src[%arg2, %arg3, %arg4] : memref<1x16x12xf32>
- memref.store %read, %dst[%arg2, %arg3, %arg4] : memref<1x16x12xf32>
- scf.reduce
- }
- return
-}
-
-// CHECK-LABEL: func @negative_unroll_dynamic_parallel_loop
-// CHECK-SAME: ([[ARG0:%.*]]: memref<1x16x12xf32>, [[ARG1:%.*]]: memref<1x16x12xf32>, [[UB3:%.*]]: index)
-// CHECK-DAG: [[C0:%.*]] = arith.constant 0 : index
-// CHECK-DAG: [[C1:%.*]] = arith.constant 1 : index
-// CHECK-DAG: [[C16:%.*]] = arith.constant 16 : index
-// CHECK: scf.parallel ([[IV0:%.*]], [[IV1:%.*]], [[IV2:%.*]]) = ([[C0]], [[C0]], [[C0]]) to ([[C1]], [[C16]], [[UB3]]) step ([[C1]], [[C1]], [[C1]])
-// CHECK: [[LOADED:%.*]] = memref.load [[ARG0]][[[IV0]], [[IV1]], [[IV2]]] : memref<1x16x12xf32>
-// CHECK: memref.store [[LOADED]], [[ARG1]][[[IV0]], [[IV1]], [[IV2]]] : memref<1x16x12xf32>
-// CHECK-NOT: affine.apply
-// CHECK-NOT: memref.load
-// CHECK-NOT: memref.store
-
-// -----
-
-func.func @unroll_inner_nested_parallel_loop(%src: memref<5x16x12x4x4xf32>, %dst: memref...
[truncated]
|
jplehr
approved these changes
Oct 24, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Reverts #163806 due to linking errors on the function
mlir::scf::computeUbMinusLb