diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index c928b76065ade..39d30400a47dc 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -15,7 +15,7 @@ #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/IR/IRMapping.h" -#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/RegionUtils.h" namespace flangomp { @@ -161,27 +161,34 @@ void collectLoopLocalValues(fir::DoConcurrentLoopOp loop, /// /// \param rewriter - builder used for updating \p allocRegion. static void localizeLoopLocalValue(mlir::Value local, mlir::Region &allocRegion, - mlir::ConversionPatternRewriter &rewriter) { + mlir::PatternRewriter &rewriter) { rewriter.moveOpBefore(local.getDefiningOp(), &allocRegion.front().front()); } } // namespace looputils class DoConcurrentConversion - : public mlir::OpConversionPattern { + : public mlir::OpRewritePattern { public: - using mlir::OpConversionPattern::OpConversionPattern; + using mlir::OpRewritePattern::OpRewritePattern; DoConcurrentConversion( mlir::MLIRContext *context, bool mapToDevice, llvm::DenseSet &concurrentLoopsToSkip, mlir::SymbolTable &moduleSymbolTable) - : OpConversionPattern(context), mapToDevice(mapToDevice), + : OpRewritePattern(context), mapToDevice(mapToDevice), concurrentLoopsToSkip(concurrentLoopsToSkip), moduleSymbolTable(moduleSymbolTable) {} mlir::LogicalResult - matchAndRewrite(fir::DoConcurrentOp doLoop, OpAdaptor adaptor, - mlir::ConversionPatternRewriter &rewriter) const override { + matchAndRewrite(fir::DoConcurrentOp doLoop, + mlir::PatternRewriter &rewriter) const override { + // TODO: This pass should use "walkAndApplyPatterns", but that driver does + // not support pre-order traversals yet. + if (doLoop->getParentOfType()) + return rewriter.notifyMatchFailure( + doLoop, "skipping op to enforce pre-order traversal"); + if (concurrentLoopsToSkip.contains(doLoop)) + return rewriter.notifyMatchFailure(doLoop, "skipping concurrent loop"); if (mapToDevice) return doLoop.emitError( "not yet implemented: Mapping `do concurrent` loops to device"); @@ -231,9 +238,8 @@ class DoConcurrentConversion rewriter.moveOpBefore(op, allocBlock, allocBlock->begin()); } - // Mark `unordered` loops that are not perfectly nested to be skipped from - // the legality check of the `ConversionTarget` since we are not interested - // in mapping them to OpenMP. + // Mark `unordered` loops that are not perfectly nested to be skipped since + // we are not interested in mapping them to OpenMP. ompLoopNest->walk([&](fir::DoConcurrentOp doLoop) { concurrentLoopsToSkip.insert(doLoop); }); @@ -245,7 +251,7 @@ class DoConcurrentConversion private: mlir::omp::ParallelOp - genParallelOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, + genParallelOp(mlir::Location loc, mlir::PatternRewriter &rewriter, looputils::InductionVariableInfos &ivInfos, mlir::IRMapping &mapper) const { auto parallelOp = mlir::omp::ParallelOp::create(rewriter, loc); @@ -256,7 +262,7 @@ class DoConcurrentConversion return parallelOp; } - void genLoopNestIndVarAllocs(mlir::ConversionPatternRewriter &rewriter, + void genLoopNestIndVarAllocs(mlir::PatternRewriter &rewriter, looputils::InductionVariableInfos &ivInfos, mlir::IRMapping &mapper) const { @@ -264,10 +270,9 @@ class DoConcurrentConversion genInductionVariableAlloc(rewriter, indVarInfo.iterVarMemDef, mapper); } - mlir::Operation * - genInductionVariableAlloc(mlir::ConversionPatternRewriter &rewriter, - mlir::Operation *indVarMemDef, - mlir::IRMapping &mapper) const { + mlir::Operation *genInductionVariableAlloc(mlir::PatternRewriter &rewriter, + mlir::Operation *indVarMemDef, + mlir::IRMapping &mapper) const { assert( indVarMemDef != nullptr && "Induction variable memdef is expected to have a defining operation."); @@ -285,8 +290,7 @@ class DoConcurrentConversion } void - genLoopNestClauseOps(mlir::Location loc, - mlir::ConversionPatternRewriter &rewriter, + genLoopNestClauseOps(mlir::Location loc, mlir::PatternRewriter &rewriter, fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper, mlir::omp::LoopNestOperands &loopNestClauseOps) const { assert(loopNestClauseOps.loopLowerBounds.empty() && @@ -308,8 +312,8 @@ class DoConcurrentConversion } mlir::omp::LoopNestOp - genWsLoopOp(mlir::ConversionPatternRewriter &rewriter, - fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper, + genWsLoopOp(mlir::PatternRewriter &rewriter, fir::DoConcurrentLoopOp loop, + mlir::IRMapping &mapper, const mlir::omp::LoopNestOperands &clauseOps, bool isComposite) const { mlir::omp::WsloopOperands wsloopClauseOps; @@ -472,18 +476,25 @@ class DoConcurrentConversionPass patterns.insert( context, mapTo == flangomp::DoConcurrentMappingKind::DCMK_Device, concurrentLoopsToSkip, moduleSymbolTable); - mlir::ConversionTarget target(*context); - target.addDynamicallyLegalOp( - [&](fir::DoConcurrentOp op) { - return concurrentLoopsToSkip.contains(op); - }); - target.markUnknownOpDynamicallyLegal( - [](mlir::Operation *) { return true; }); - - if (mlir::failed( - mlir::applyFullConversion(module, target, std::move(patterns)))) { + + // TODO: This pass should use "walkAndApplyPatterns", but that driver does + // not support pre-order traversals yet. + if (mlir::failed(applyPatternsGreedily(module.getOperation(), + std::move(patterns)))) { + module.emitError("failed to apply patterns"); signalPassFailure(); } + + // Make sure that all loops were converted. + mlir::WalkResult status = module->walk([&](fir::DoConcurrentOp op) { + if (concurrentLoopsToSkip.contains(op)) + return mlir::WalkResult::advance(); + + op.emitError("failed to convert operation"); + return mlir::WalkResult::interrupt(); + }); + if (status.wasInterrupted()) + signalPassFailure(); } }; } // namespace diff --git a/flang/test/Transforms/DoConcurrent/basic_device.mlir b/flang/test/Transforms/DoConcurrent/basic_device.mlir index 0ca48943864c8..b88522a6307b8 100644 --- a/flang/test/Transforms/DoConcurrent/basic_device.mlir +++ b/flang/test/Transforms/DoConcurrent/basic_device.mlir @@ -12,7 +12,7 @@ func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_bas %c1 = arith.constant 1 : index // expected-error@+2 {{not yet implemented: Mapping `do concurrent` loops to device}} - // expected-error@below {{failed to legalize operation 'fir.do_concurrent'}} + // expected-error@below {{failed to convert operation}} fir.do_concurrent { %0 = fir.alloca i32 {bindc_name = "i"} %1:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Transforms/DoConcurrent/basic_host.f90 b/flang/test/Transforms/DoConcurrent/basic_host.f90 index 6f24b346e3fb9..252be38ac8fd9 100644 --- a/flang/test/Transforms/DoConcurrent/basic_host.f90 +++ b/flang/test/Transforms/DoConcurrent/basic_host.f90 @@ -7,6 +7,10 @@ ! CHECK-LABEL: DO_CONCURRENT_BASIC program do_concurrent_basic + ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 + ! CHECK: %[[C10:.*]] = arith.constant 10 : i32 + ! CHECK: %[[STEP:.*]] = arith.constant 1 : index + ! CHECK: %[[ARR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) implicit none @@ -15,11 +19,8 @@ program do_concurrent_basic ! CHECK-NOT: fir.do_loop - ! CHECK: %[[C1:.*]] = arith.constant 1 : i32 ! CHECK: %[[LB:.*]] = fir.convert %[[C1]] : (i32) -> index - ! CHECK: %[[C10:.*]] = arith.constant 10 : i32 ! CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index - ! CHECK: %[[STEP:.*]] = arith.constant 1 : index ! CHECK: omp.parallel { diff --git a/flang/test/Transforms/DoConcurrent/basic_host.mlir b/flang/test/Transforms/DoConcurrent/basic_host.mlir index 5425829404d7b..34d5c26c88d25 100644 --- a/flang/test/Transforms/DoConcurrent/basic_host.mlir +++ b/flang/test/Transforms/DoConcurrent/basic_host.mlir @@ -4,6 +4,9 @@ // CHECK-LABEL: func.func @do_concurrent_basic func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_basic"} { + // CHECK: %[[C1:.*]] = arith.constant 1 : i32 + // CHECK: %[[C10:.*]] = arith.constant 10 : i32 + // CHECK: %[[STEP:.*]] = arith.constant 1 : index // CHECK: %[[ARR:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) %2 = fir.address_of(@_QFEa) : !fir.ref> @@ -18,11 +21,8 @@ func.func @do_concurrent_basic() attributes {fir.bindc_name = "do_concurrent_bas // CHECK-NOT: fir.do_concurrent - // CHECK: %[[C1:.*]] = arith.constant 1 : i32 // CHECK: %[[LB:.*]] = fir.convert %[[C1]] : (i32) -> index - // CHECK: %[[C10:.*]] = arith.constant 10 : i32 // CHECK: %[[UB:.*]] = fir.convert %[[C10]] : (i32) -> index - // CHECK: %[[STEP:.*]] = arith.constant 1 : index // CHECK: omp.parallel { diff --git a/flang/test/Transforms/DoConcurrent/locality_specifiers_simple.mlir b/flang/test/Transforms/DoConcurrent/locality_specifiers_simple.mlir index 160c1df040680..49fa7441ab311 100644 --- a/flang/test/Transforms/DoConcurrent/locality_specifiers_simple.mlir +++ b/flang/test/Transforms/DoConcurrent/locality_specifiers_simple.mlir @@ -33,13 +33,13 @@ func.func @_QPlocal_spec_translation() { // CHECK: omp.private {type = private} @[[PRIVATIZER:.*local_spec_translationElocal_var.*.omp]] : f32 // CHECK: func.func @_QPlocal_spec_translation +// CHECK: %[[C42:.*]] = arith.constant 4.200000e+01 : f32 // CHECK: %[[LOCAL_VAR:.*]] = fir.alloca f32 {bindc_name = "local_var", {{.*}}} // CHECK: %[[LOCAL_VAR_DECL:.*]]:2 = hlfir.declare %[[LOCAL_VAR]] // CHECK: omp.parallel { // CHECK: omp.wsloop private(@[[PRIVATIZER]] %[[LOCAL_VAR_DECL]]#0 -> %[[LOCAL_ARG:.*]] : !fir.ref) { // CHECK: omp.loop_nest {{.*}} { // CHECK: %[[PRIV_DECL:.*]]:2 = hlfir.declare %[[LOCAL_ARG]] -// CHECK: %[[C42:.*]] = arith.constant // CHECK: hlfir.assign %[[C42]] to %[[PRIV_DECL]]#0 // CHECK: omp.yield // CHECK: } diff --git a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 index d0210726de83e..d40c1892820f1 100644 --- a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 +++ b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 @@ -20,22 +20,21 @@ program main ! CHECK: func.func @_QQmain ! CHECK: %[[C3:.*]] = arith.constant 3 : i32 -! CHECK: %[[LB_I:.*]] = fir.convert %[[C3]] : (i32) -> index ! CHECK: %[[C20:.*]] = arith.constant 20 : i32 +! CHECK: %[[C1:.*]] = arith.constant 1 : index +! CHECK: %[[C5:.*]] = arith.constant 5 : i32 +! CHECK: %[[C40:.*]] = arith.constant 40 : i32 +! CHECK: %[[C7:.*]] = arith.constant 7 : i32 +! CHECK: %[[C60:.*]] = arith.constant 60 : i32 + +! CHECK: %[[LB_I:.*]] = fir.convert %[[C3]] : (i32) -> index ! CHECK: %[[UB_I:.*]] = fir.convert %[[C20]] : (i32) -> index -! CHECK: %[[STEP_I:.*]] = arith.constant 1 : index -! CHECK: %[[C5:.*]] = arith.constant 5 : i32 ! CHECK: %[[LB_J:.*]] = fir.convert %[[C5]] : (i32) -> index -! CHECK: %[[C40:.*]] = arith.constant 40 : i32 ! CHECK: %[[UB_J:.*]] = fir.convert %[[C40]] : (i32) -> index -! CHECK: %[[STEP_J:.*]] = arith.constant 1 : index -! CHECK: %[[C7:.*]] = arith.constant 7 : i32 ! CHECK: %[[LB_K:.*]] = fir.convert %[[C7]] : (i32) -> index -! CHECK: %[[C60:.*]] = arith.constant 60 : i32 ! CHECK: %[[UB_K:.*]] = fir.convert %[[C60]] : (i32) -> index -! CHECK: %[[STEP_K:.*]] = arith.constant 1 : index ! CHECK: omp.parallel { @@ -53,7 +52,7 @@ program main ! CHECK-SAME: (%[[ARG0:[^[:space:]]+]], %[[ARG1:[^[:space:]]+]], %[[ARG2:[^[:space:]]+]]) ! CHECK-SAME: : index = (%[[LB_I]], %[[LB_J]], %[[LB_K]]) ! CHECK-SAME: to (%[[UB_I]], %[[UB_J]], %[[UB_K]]) inclusive -! CHECK-SAME: step (%[[STEP_I]], %[[STEP_J]], %[[STEP_K]]) { +! CHECK-SAME: step (%[[C1]], %[[C1]], %[[C1]]) { ! CHECK-NEXT: %[[IV_IDX_I:.*]] = fir.convert %[[ARG0]] ! CHECK-NEXT: fir.store %[[IV_IDX_I]] to %[[BINDING_I]]#0 diff --git a/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 b/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 index cd1bd4f98a3f5..316ef18ca836f 100644 --- a/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 +++ b/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 @@ -20,6 +20,8 @@ subroutine foo(n) end program main +! CHECK: %[[C1:.*]] = arith.constant 1 : index + ! CHECK: %[[N_DECL:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{.*}} {uniq_name = "_QFFfooEn"} ! CHECK: fir.load @@ -27,7 +29,6 @@ end program main ! CHECK: %[[LB:.*]] = fir.convert %{{c1_.*}} : (i32) -> index ! CHECK: %[[N_VAL:.*]] = fir.load %[[N_DECL]]#0 : !fir.ref ! CHECK: %[[UB:.*]] = fir.convert %[[N_VAL]] : (i32) -> index -! CHECK: %[[C1:.*]] = arith.constant 1 : index ! CHECK: omp.parallel { diff --git a/flang/test/Transforms/DoConcurrent/reduce_add.mlir b/flang/test/Transforms/DoConcurrent/reduce_add.mlir index 1ea3e3e527335..bf9ce75e6c978 100644 --- a/flang/test/Transforms/DoConcurrent/reduce_add.mlir +++ b/flang/test/Transforms/DoConcurrent/reduce_add.mlir @@ -44,11 +44,12 @@ func.func @_QPdo_concurrent_reduce() { // CHECK: } // CHECK-LABEL: func.func @_QPdo_concurrent_reduce() { +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 // CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i"} // CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) // CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"} // CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) -// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index // CHECK: omp.parallel { // CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i"} // CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -59,7 +60,6 @@ func.func @_QPdo_concurrent_reduce() { // CHECK: fir.store %[[VAL_9]] to %[[VAL_6]]#0 : !fir.ref // CHECK: %[[VAL_10:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) // CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_10]]#0 : !fir.ref -// CHECK: %[[VAL_12:.*]] = arith.constant 1 : i32 // CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32 // CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_10]]#0 : i32, !fir.ref // CHECK: omp.yield diff --git a/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir b/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir index 3d5b8bf22af75..815c3cfdd1a24 100644 --- a/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir +++ b/flang/test/Transforms/DoConcurrent/reduce_all_regions.mlir @@ -49,11 +49,11 @@ func.func @_QPdo_concurrent_reduce() { // CHECK: } // CHECK-LABEL: func.func @_QPdo_concurrent_reduce() { +// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index // CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i"} // CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) // CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"} // CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) -// CHECK: %[[VAL_4:.*]] = arith.constant 1 : index // CHECK: omp.parallel { // CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "i"} // CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) diff --git a/flang/test/Transforms/DoConcurrent/reduce_local.mlir b/flang/test/Transforms/DoConcurrent/reduce_local.mlir index 0f667109e6e83..588d95f957a7d 100644 --- a/flang/test/Transforms/DoConcurrent/reduce_local.mlir +++ b/flang/test/Transforms/DoConcurrent/reduce_local.mlir @@ -51,13 +51,14 @@ fir.declare_reduction @add_reduction_i32 : i32 init { // CHECK: omp.private {type = private} @_QFdo_concurrent_reduceEl_private_i32.omp : i32 // CHECK-LABEL: func.func @_QPdo_concurrent_reduce() { +// CHECK: %[[VAL_6:.*]] = arith.constant 1 : index +// CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 // CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "i"} // CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) // CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "l", uniq_name = "_QFdo_concurrent_reduceEl"} // CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFdo_concurrent_reduceEl"} : (!fir.ref) -> (!fir.ref, !fir.ref) // CHECK: %[[VAL_4:.*]] = fir.alloca i32 {bindc_name = "s", uniq_name = "_QFdo_concurrent_reduceEs"} // CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) -// CHECK: %[[VAL_6:.*]] = arith.constant 1 : index // CHECK: omp.parallel { // CHECK: %[[VAL_7:.*]] = fir.alloca i32 {bindc_name = "i"} // CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_7]] {uniq_name = "_QFdo_concurrent_reduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -67,7 +68,6 @@ fir.declare_reduction @add_reduction_i32 : i32 init { // CHECK: fir.store %[[VAL_12]] to %[[VAL_8]]#0 : !fir.ref // CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_9]] {uniq_name = "_QFdo_concurrent_reduceEl"} : (!fir.ref) -> (!fir.ref, !fir.ref) // CHECK: %[[VAL_14:.*]]:2 = hlfir.declare %[[VAL_10]] {uniq_name = "_QFdo_concurrent_reduceEs"} : (!fir.ref) -> (!fir.ref, !fir.ref) -// CHECK: %[[VAL_15:.*]] = arith.constant 1 : i32 // CHECK: hlfir.assign %[[VAL_15]] to %[[VAL_13]]#0 : i32, !fir.ref // CHECK: %[[VAL_16:.*]] = fir.load %[[VAL_14]]#0 : !fir.ref // CHECK: %[[VAL_17:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref