diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h index 6f5dff4687cbb..f83a1559fa016 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.h +++ b/flang/include/flang/Optimizer/Transforms/Passes.h @@ -53,6 +53,9 @@ std::unique_ptr createVScaleAttrPass(); std::unique_ptr createVScaleAttrPass(std::pair vscaleAttr); +void populateFIRToSCFRewrites(mlir::RewritePatternSet &patterns, + bool parallelUnordered = false); + void populateCfgConversionRewrites(mlir::RewritePatternSet &patterns, bool forceLoopToExecuteOnce = false, bool setNSW = true); diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td index bb2509b1747d5..0f613584c6e17 100644 --- a/flang/include/flang/Optimizer/Transforms/Passes.td +++ b/flang/include/flang/Optimizer/Transforms/Passes.td @@ -85,6 +85,10 @@ def FIRToSCFPass : Pass<"fir-to-scf"> { let dependentDialects = [ "fir::FIROpsDialect", "mlir::scf::SCFDialect" ]; + let options = [Option<"parallelUnordered", "parallel-unordered", "bool", + /*default=*/"false", + "Allow converting a fir.do_loop with the `unordered` " + "attribute to scf.parallel (experimental).">]; } def AnnotateConstantOperands : Pass<"annotate-constant"> { diff --git a/flang/lib/Optimizer/Transforms/FIRToSCF.cpp b/flang/lib/Optimizer/Transforms/FIRToSCF.cpp index 70d6ebbcb039c..e72ee333101f5 100644 --- a/flang/lib/Optimizer/Transforms/FIRToSCF.cpp +++ b/flang/lib/Optimizer/Transforms/FIRToSCF.cpp @@ -25,11 +25,18 @@ class FIRToSCFPass : public fir::impl::FIRToSCFPassBase { struct DoLoopConversion : public mlir::OpRewritePattern { using OpRewritePattern::OpRewritePattern; + DoLoopConversion(mlir::MLIRContext *context, + bool parallelUnorderedLoop = false, + mlir::PatternBenefit benefit = 1) + : OpRewritePattern(context, benefit), + parallelUnorderedLoop(parallelUnorderedLoop) {} + mlir::LogicalResult matchAndRewrite(fir::DoLoopOp doLoopOp, mlir::PatternRewriter &rewriter) const override { mlir::Location loc = doLoopOp.getLoc(); bool hasFinalValue = doLoopOp.getFinalValue().has_value(); + bool isUnordered = doLoopOp.getUnordered().has_value(); // Get loop values from the DoLoopOp mlir::Value low = doLoopOp.getLowerBound(); @@ -53,39 +60,54 @@ struct DoLoopConversion : public mlir::OpRewritePattern { mlir::arith::DivSIOp::create(rewriter, loc, distance, step); auto zero = mlir::arith::ConstantIndexOp::create(rewriter, loc, 0); auto one = mlir::arith::ConstantIndexOp::create(rewriter, loc, 1); - auto scfForOp = - mlir::scf::ForOp::create(rewriter, loc, zero, tripCount, one, iterArgs); + // Create the scf.for or scf.parallel operation + mlir::Operation *scfLoopOp = nullptr; + if (isUnordered && parallelUnorderedLoop) { + scfLoopOp = mlir::scf::ParallelOp::create(rewriter, loc, {zero}, + {tripCount}, {one}, iterArgs); + } else { + scfLoopOp = mlir::scf::ForOp::create(rewriter, loc, zero, tripCount, one, + iterArgs); + } + + // Move the body of the fir.do_loop to the scf.for or scf.parallel auto &loopOps = doLoopOp.getBody()->getOperations(); auto resultOp = mlir::cast(doLoopOp.getBody()->getTerminator()); auto results = resultOp.getOperands(); - mlir::Block *loweredBody = scfForOp.getBody(); + auto scfLoopLikeOp = mlir::cast(scfLoopOp); + mlir::Block &scfLoopBody = scfLoopLikeOp.getLoopRegions().front()->front(); - loweredBody->getOperations().splice(loweredBody->begin(), loopOps, - loopOps.begin(), - std::prev(loopOps.end())); + scfLoopBody.getOperations().splice(scfLoopBody.begin(), loopOps, + loopOps.begin(), + std::prev(loopOps.end())); - rewriter.setInsertionPointToStart(loweredBody); + rewriter.setInsertionPointToStart(&scfLoopBody); mlir::Value iv = mlir::arith::MulIOp::create( - rewriter, loc, scfForOp.getInductionVar(), step); + rewriter, loc, scfLoopLikeOp.getSingleInductionVar().value(), step); iv = mlir::arith::AddIOp::create(rewriter, loc, low, iv); if (!results.empty()) { - rewriter.setInsertionPointToEnd(loweredBody); + rewriter.setInsertionPointToEnd(&scfLoopBody); mlir::scf::YieldOp::create(rewriter, resultOp->getLoc(), results); } doLoopOp.getInductionVar().replaceAllUsesWith(iv); - rewriter.replaceAllUsesWith(doLoopOp.getRegionIterArgs(), - hasFinalValue - ? scfForOp.getRegionIterArgs().drop_front() - : scfForOp.getRegionIterArgs()); - - // Copy all the attributes from the old to new op. - scfForOp->setAttrs(doLoopOp->getAttrs()); - rewriter.replaceOp(doLoopOp, scfForOp); + rewriter.replaceAllUsesWith( + doLoopOp.getRegionIterArgs(), + hasFinalValue ? scfLoopLikeOp.getRegionIterArgs().drop_front() + : scfLoopLikeOp.getRegionIterArgs()); + + // Copy loop annotations from the fir.do_loop to scf loop op. + if (auto ann = doLoopOp.getLoopAnnotation()) + scfLoopOp->setAttr("loop_annotation", *ann); + + rewriter.replaceOp(doLoopOp, scfLoopOp); return mlir::success(); } + +private: + bool parallelUnorderedLoop; }; struct IterWhileConversion : public mlir::OpRewritePattern { @@ -197,10 +219,15 @@ struct IfConversion : public mlir::OpRewritePattern { }; } // namespace +void fir::populateFIRToSCFRewrites(mlir::RewritePatternSet &patterns, + bool parallelUnordered) { + patterns.add(patterns.getContext()); + patterns.add(patterns.getContext(), parallelUnordered); +} + void FIRToSCFPass::runOnOperation() { mlir::RewritePatternSet patterns(&getContext()); - patterns.add( - patterns.getContext()); + fir::populateFIRToSCFRewrites(patterns, parallelUnordered); walkAndApplyPatterns(getOperation(), std::move(patterns)); } diff --git a/flang/test/Fir/FirToSCF/do-loop.fir b/flang/test/Fir/FirToSCF/do-loop.fir index 812497c8d0c74..8862a4c2969e8 100644 --- a/flang/test/Fir/FirToSCF/do-loop.fir +++ b/flang/test/Fir/FirToSCF/do-loop.fir @@ -1,4 +1,5 @@ -// RUN: fir-opt %s --fir-to-scf | FileCheck %s +// RUN: fir-opt %s --fir-to-scf --split-input-file | FileCheck %s --check-prefixes=CHECK,NO-PARALLEL +// RUN: fir-opt %s --fir-to-scf='parallel-unordered' --split-input-file | FileCheck %s --check-prefixes=CHECK,PARALLEL // CHECK-LABEL: func.func @simple_loop( // CHECK-SAME: %[[ARG0:.*]]: !fir.ref>) { @@ -31,6 +32,8 @@ func.func @simple_loop(%arg0: !fir.ref>) { return } +// ----- + // CHECK-LABEL: func.func @loop_with_negtive_step( // CHECK-SAME: %[[ARG0:.*]]: !fir.ref>) { // CHECK: %[[VAL_0:.*]] = arith.constant 100 : index @@ -64,6 +67,8 @@ func.func @loop_with_negtive_step(%arg0: !fir.ref>) { return } +// ----- + // CHECK-LABEL: func.func @loop_with_results( // CHECK-SAME: %[[ARG0:.*]]: !fir.ref>, // CHECK-SAME: %[[ARG1:.*]]: !fir.ref) { @@ -102,6 +107,8 @@ func.func @loop_with_results(%arg0: !fir.ref>, %arg1: !fir.r return } +// ----- + // CHECK-LABEL: func.func @loop_with_final_value( // CHECK-SAME: %[[ARG0:.*]]: !fir.ref>, // CHECK-SAME: %[[ARG1:.*]]: !fir.ref) { @@ -146,6 +153,45 @@ func.func @loop_with_final_value(%arg0: !fir.ref>, %arg1: !f return } +// ----- + +// CHECK-LABEL: func.func @loop_with_unordered_attr( +// CHECK-SAME: %[[ARG0:.*]]: !fir.ref>) { +// CHECK: %[[CONSTANT_0:.*]] = arith.constant 1 : index +// CHECK: %[[CONSTANT_1:.*]] = arith.constant 100 : index +// CHECK: %[[SHAPE_0:.*]] = fir.shape %[[CONSTANT_1]] : (index) -> !fir.shape<1> +// CHECK: %[[CONSTANT_2:.*]] = arith.constant 1 : i32 +// CHECK: %[[SUBI_0:.*]] = arith.subi %[[CONSTANT_1]], %[[CONSTANT_0]] : index +// CHECK: %[[ADDI_0:.*]] = arith.addi %[[SUBI_0]], %[[CONSTANT_0]] : index +// CHECK: %[[DIVSI_0:.*]] = arith.divsi %[[ADDI_0]], %[[CONSTANT_0]] : index +// CHECK: %[[CONSTANT_3:.*]] = arith.constant 0 : index +// CHECK: %[[CONSTANT_4:.*]] = arith.constant 1 : index +// PARALLEL: scf.parallel (%[[VAL_0:.*]]) = (%[[CONSTANT_3]]) to (%[[DIVSI_0]]) step (%[[CONSTANT_4]]) { +// NO-PARALLEL: scf.for %[[VAL_0:.*]] = %[[CONSTANT_3]] to %[[DIVSI_0]] step %[[CONSTANT_4]] { +// CHECK: %[[MULI_0:.*]] = arith.muli %[[VAL_0]], %[[CONSTANT_0]] : index +// CHECK: %[[ADDI_1:.*]] = arith.addi %[[CONSTANT_0]], %[[MULI_0]] : index +// CHECK: %[[ARRAY_COOR_0:.*]] = fir.array_coor %[[ARG0]](%[[SHAPE_0]]) %[[ADDI_1]] : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref +// CHECK: fir.store %[[CONSTANT_2]] to %[[ARRAY_COOR_0]] : !fir.ref +// PARALLEL: scf.reduce +// CHECK: } +// CHECK: return +// CHECK: } +func.func @loop_with_unordered_attr(%arg0: !fir.ref>) { + %c1 = arith.constant 1 : index + %c100 = arith.constant 100 : index + %0 = fir.shape %c100 : (index) -> !fir.shape<1> + %c1_i32 = arith.constant 1 : i32 + fir.do_loop %arg1 = %c1 to %c100 step %c1 unordered { + %1 = fir.array_coor %arg0(%0) %arg1 : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref + fir.store %c1_i32 to %1 : !fir.ref + } + return +} + +// ----- + +// CHECK: #[[$ATTR_0:.+]] = #llvm.loop_vectorize +// CHECK: #[[$ATTR_1:.+]] = #llvm.loop_annotation // CHECK-LABEL: func.func @loop_with_attribute( // CHECK-SAME: %[[ARG0:.*]]: !fir.ref>, // CHECK-SAME: %[[ARG1:.*]]: !fir.ref) { @@ -167,16 +213,19 @@ func.func @loop_with_final_value(%arg0: !fir.ref>, %arg1: !f // CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_3]] : !fir.ref // CHECK: %[[VAL_16:.*]] = arith.addi %[[VAL_15]], %[[VAL_14]] : i32 // CHECK: fir.store %[[VAL_16]] to %[[VAL_3]] : !fir.ref -// CHECK: } {operandSegmentSizes = array, reduceAttrs = [#fir.reduce_attr]} +// CHECK: } {loop_annotation = #[[$ATTR_1]]} // CHECK: return // CHECK: } + +#loop_vectorize = #llvm.loop_vectorize +#loop_annotation = #llvm.loop_annotation func.func @loop_with_attribute(%arg0: !fir.ref>, %arg1: !fir.ref) { %c1 = arith.constant 1 : index %c0_i32 = arith.constant 0 : i32 %c100 = arith.constant 100 : index %0 = fir.alloca i32 %1 = fir.shape %c100 : (index) -> !fir.shape<1> - fir.do_loop %arg2 = %c1 to %c100 step %c1 reduce(#fir.reduce_attr -> %0 : !fir.ref) { + fir.do_loop %arg2 = %c1 to %c100 step %c1 attributes {loopAnnotation = #loop_annotation} { %2 = fir.array_coor %arg0(%1) %arg2 : (!fir.ref>, !fir.shape<1>, index) -> !fir.ref %3 = fir.load %2 : !fir.ref %4 = fir.load %0 : !fir.ref @@ -187,6 +236,8 @@ func.func @loop_with_attribute(%arg0: !fir.ref>, %arg1: !fir return } +// ----- + // CHECK-LABEL: func.func @nested_loop( // CHECK-SAME: %[[ARG0:.*]]: !fir.ref>) { // CHECK: %[[VAL_0:.*]] = arith.constant 1 : index