diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h index 872b7d50c3e7a..d2b7a423d7781 100644 --- a/flang/include/flang/Lower/OpenMP.h +++ b/flang/include/flang/Lower/OpenMP.h @@ -50,8 +50,8 @@ struct Variable; } // namespace pft // Generate the OpenMP terminator for Operation at Location. -void genOpenMPTerminator(fir::FirOpBuilder &, mlir::Operation *, - mlir::Location); +mlir::Operation *genOpenMPTerminator(fir::FirOpBuilder &, mlir::Operation *, + mlir::Location); void genOpenMPConstruct(AbstractConverter &, Fortran::lower::SymMap &, semantics::SemanticsContext &, pft::Evaluation &, diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index f269155e45194..6a40bd54a2f3b 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -27,6 +27,7 @@ #include "flang/Parser/parse-tree.h" #include "flang/Semantics/openmp-directive-sets.h" #include "flang/Semantics/tools.h" +#include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" #include "mlir/Dialect/OpenMP/OpenMPDialect.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/Transforms/RegionUtils.h" @@ -385,7 +386,8 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { // construct mlir::OpBuilder::InsertPoint unstructuredSectionsIP = firOpBuilder.saveInsertionPoint(); - firOpBuilder.setInsertionPointToStart(&op->getRegion(0).back()); + mlir::Operation *lastOper = op->getRegion(0).back().getTerminator(); + firOpBuilder.setInsertionPoint(lastOper); lastPrivIP = firOpBuilder.saveInsertionPoint(); firOpBuilder.restoreInsertionPoint(unstructuredSectionsIP); } @@ -2135,15 +2137,6 @@ static mlir::Type getLoopVarType(Fortran::lower::AbstractConverter &converter, return converter.getFirOpBuilder().getIntegerType(loopVarTypeSize); } -static void resetBeforeTerminator(fir::FirOpBuilder &firOpBuilder, - mlir::Operation *storeOp, - mlir::Block &block) { - if (storeOp) - firOpBuilder.setInsertionPointAfter(storeOp); - else - firOpBuilder.setInsertionPointToStart(&block); -} - static mlir::Operation * createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter, mlir::Location loc, mlir::Value indexVal, @@ -2186,11 +2179,17 @@ static void createBodyOfOp( const llvm::SmallVector &args = {}, bool outerCombined = false, DataSharingProcessor *dsp = nullptr) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + auto insertMarker = [](fir::FirOpBuilder &builder) { + mlir::Value undef = builder.create(builder.getUnknownLoc(), + builder.getIndexType()); + return undef.getDefiningOp(); + }; + // If an argument for the region is provided then create the block with that // argument. Also update the symbol's address with the mlir argument value. // e.g. For loops the argument is the induction variable. And all further // uses of the induction variable should use this mlir value. - mlir::Operation *storeOp = nullptr; if (args.size()) { std::size_t loopVarTypeSize = 0; for (const Fortran::semantics::Symbol *arg : args) @@ -2201,20 +2200,20 @@ static void createBodyOfOp( firOpBuilder.createBlock(&op.getRegion(), {}, tiv, locs); // The argument is not currently in memory, so make a temporary for the // argument, and store it there, then bind that location to the argument. + mlir::Operation *storeOp = nullptr; for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { mlir::Value indexVal = fir::getBase(op.getRegion().front().getArgument(argIndex)); storeOp = createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); } + firOpBuilder.setInsertionPointAfter(storeOp); } else { firOpBuilder.createBlock(&op.getRegion()); } - // Set the insert for the terminator operation to go at the end of the - // block - this is either empty or the block with the stores above, - // the end of the block works for both. - mlir::Block &block = op.getRegion().back(); - firOpBuilder.setInsertionPointToEnd(&block); + + // Mark the earliest insertion point. + mlir::Operation *marker = insertMarker(firOpBuilder); // If it is an unstructured region and is not the outer region of a combined // construct, create empty blocks for all evaluations. @@ -2223,37 +2222,100 @@ static void createBodyOfOp( mlir::omp::YieldOp>( firOpBuilder, eval.getNestedEvaluations()); - // Insert the terminator. - Fortran::lower::genOpenMPTerminator(firOpBuilder, op.getOperation(), loc); - // Reset the insert point to before the terminator. - resetBeforeTerminator(firOpBuilder, storeOp, block); + // Start with privatization, so that the lowering of the nested + // code will use the right symbols. + constexpr bool isLoop = std::is_same_v || + std::is_same_v; + bool privatize = clauses && !outerCombined; - // Handle privatization. Do not privatize if this is the outer operation. - if (clauses && !outerCombined) { - constexpr bool isLoop = std::is_same_v || - std::is_same_v; + firOpBuilder.setInsertionPoint(marker); + std::optional tempDsp; + if (privatize) { if (!dsp) { - DataSharingProcessor proc(converter, *clauses, eval); - proc.processStep1(); - proc.processStep2(op, isLoop); - } else { - if (isLoop && args.size() > 0) - dsp->setLoopIV(converter.getSymbolAddress(*args[0])); - dsp->processStep2(op, isLoop); + tempDsp.emplace(converter, *clauses, eval); + tempDsp->processStep1(); } - - if (storeOp) - firOpBuilder.setInsertionPointAfter(storeOp); } if constexpr (std::is_same_v) { threadPrivatizeVars(converter, eval); - if (clauses) + if (clauses) { + firOpBuilder.setInsertionPoint(marker); ClauseProcessor(converter, *clauses).processCopyin(); + } } - if (genNested) + if (genNested) { + // genFIR(Evaluation&) tries to patch up unterminated blocks, causing + // a lot of trouble if the terminator generation is delayed past this + // point. Insert a temporary terminator here, then delete it. + firOpBuilder.setInsertionPointToEnd(&op.getRegion().back()); + auto *temp = Fortran::lower::genOpenMPTerminator(firOpBuilder, + op.getOperation(), loc); + firOpBuilder.setInsertionPointAfter(marker); genNestedEvaluations(converter, eval); + temp->erase(); + } + + // Get or create a unique exiting block from the given region, or + // return nullptr if there is no exiting block. + auto getUniqueExit = [&](mlir::Region ®ion) -> mlir::Block * { + // Find the blocks where the OMP terminator should go. In simple cases + // it is the single block in the operation's region. When the region + // is more complicated, especially with unstructured control flow, there + // may be multiple blocks, and some of them may have non-OMP terminators + // resulting from lowering of the code contained within the operation. + // All the remaining blocks are potential exit points from the op's region. + // + // Explicit control flow cannot exit any OpenMP region (other than via + // STOP), and that is enforced by semantic checks prior to lowering. STOP + // statements are lowered to a function call. + + // Collect unterminated blocks. + llvm::SmallVector exits; + for (mlir::Block &b : region) { + if (b.empty() || !b.back().hasTrait()) + exits.push_back(&b); + } + + if (exits.empty()) + return nullptr; + // If there already is a unique exiting block, do not create another one. + // Additionally, some ops (e.g. omp.sections) require only 1 block in + // its region. + if (exits.size() == 1) + return exits[0]; + mlir::Block *exit = firOpBuilder.createBlock(®ion); + for (mlir::Block *b : exits) { + firOpBuilder.setInsertionPointToEnd(b); + firOpBuilder.create(loc, exit); + } + return exit; + }; + + if (auto *exitBlock = getUniqueExit(op.getRegion())) { + firOpBuilder.setInsertionPointToEnd(exitBlock); + auto *term = Fortran::lower::genOpenMPTerminator(firOpBuilder, + op.getOperation(), loc); + // Only insert lastprivate code when there actually is an exit block. + // Such a block may not exist if the nested code produced an infinite + // loop (this may not make sense in production code, but a user could + // write that and we should handle it). + firOpBuilder.setInsertionPoint(term); + if (privatize) { + if (!dsp) { + assert(tempDsp.has_value()); + tempDsp->processStep2(op, isLoop); + } else { + if (isLoop && args.size() > 0) + dsp->setLoopIV(converter.getSymbolAddress(*args[0])); + dsp->processStep2(op, isLoop); + } + } + } + + firOpBuilder.setInsertionPointAfter(marker); + marker->erase(); } static void genBodyOfTargetDataOp( @@ -3685,14 +3747,14 @@ genOMP(Fortran::lower::AbstractConverter &converter, // Public functions //===----------------------------------------------------------------------===// -void Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder, - mlir::Operation *op, - mlir::Location loc) { +mlir::Operation *Fortran::lower::genOpenMPTerminator(fir::FirOpBuilder &builder, + mlir::Operation *op, + mlir::Location loc) { if (mlir::isa(op)) - builder.create(loc); + return builder.create(loc); else - builder.create(loc); + return builder.create(loc); } void Fortran::lower::genOpenMPConstruct( diff --git a/flang/test/Lower/OpenMP/FIR/sections.f90 b/flang/test/Lower/OpenMP/FIR/sections.f90 index 87d34e58321be..640ec34a05bc2 100644 --- a/flang/test/Lower/OpenMP/FIR/sections.f90 +++ b/flang/test/Lower/OpenMP/FIR/sections.f90 @@ -126,11 +126,11 @@ subroutine lastprivate() x = x * 10 !CHECK: omp.section { !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"} -!CHECK: %[[true:.*]] = arith.constant true !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref !CHECK: %[[const:.*]] = arith.constant 1 : i32 !CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32 !CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref +!CHECK: %[[true:.*]] = arith.constant true !CHECK: fir.if %[[true]] { !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref !CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref @@ -163,11 +163,11 @@ subroutine lastprivate() !CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref !CHECK: omp.barrier -!CHECK: %[[true:.*]] = arith.constant true !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref !CHECK: %[[const:.*]] = arith.constant 1 : i32 !CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32 !CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref +!CHECK: %[[true:.*]] = arith.constant true !CHECK: fir.if %true { !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref !CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref @@ -200,11 +200,11 @@ subroutine lastprivate() !CHECK: %[[temp:.*]] = fir.load %[[X]] : !fir.ref !CHECK: fir.store %[[temp]] to %[[PRIVATE_X]] : !fir.ref !CHECK: omp.barrier -!CHECK: %[[true:.*]] = arith.constant true !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref !CHECK: %[[const:.*]] = arith.constant 1 : i32 !CHECK: %[[result:.*]] = arith.addi %[[temp]], %[[const]] : i32 !CHECK: fir.store %[[result]] to %[[PRIVATE_X]] : !fir.ref +!CHECK: %[[true:.*]] = arith.constant true !CHECK: fir.if %true { !CHECK: %[[temp:.*]] = fir.load %[[PRIVATE_X]] : !fir.ref !CHECK: fir.store %[[temp]] to %[[X]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90 b/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90 new file mode 100644 index 0000000000000..16b400a231860 --- /dev/null +++ b/flang/test/Lower/OpenMP/infinite-loop-in-construct.f90 @@ -0,0 +1,26 @@ +! RUN: bbc -fopenmp -o - %s | FileCheck %s + +! Check that this test can be lowered successfully. +! See https://github.com/llvm/llvm-project/issues/74348 + +! CHECK-LABEL: func.func @_QPsb +! CHECK: omp.parallel +! CHECK: cf.cond_br %{{[0-9]+}}, ^bb1, ^bb2 +! CHECK-NEXT: ^bb1: // pred: ^bb0 +! CHECK: cf.br ^bb2 +! CHECK-NEXT: ^bb2: // 3 preds: ^bb0, ^bb1, ^bb2 +! CHECK-NEXT: cf.br ^bb2 +! CHECK-NEXT: } + +subroutine sb(ninter, numnod) + integer :: ninter, numnod + integer, dimension(:), allocatable :: indx_nm + + !$omp parallel + if (ninter>0) then + allocate(indx_nm(numnod)) + endif + 220 continue + goto 220 + !$omp end parallel +end subroutine diff --git a/flang/test/Lower/OpenMP/sections.f90 b/flang/test/Lower/OpenMP/sections.f90 index 16426d070d9a9..6bad688058d28 100644 --- a/flang/test/Lower/OpenMP/sections.f90 +++ b/flang/test/Lower/OpenMP/sections.f90 @@ -141,11 +141,11 @@ subroutine lastprivate() !CHECK: omp.section { !CHECK: %[[PRIVATE_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned, uniq_name = "_QFlastprivateEx"} !CHECK: %[[PRIVATE_X_DECL:.*]]:2 = hlfir.declare %[[PRIVATE_X]] {uniq_name = "_QFlastprivateEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) -!CHECK: %[[TRUE:.*]] = arith.constant true !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref !CHECK: %[[CONST:.*]] = arith.constant 1 : i32 !CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32 !CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref +!CHECK: %[[TRUE:.*]] = arith.constant true !CHECK: fir.if %[[TRUE]] { !CHECK: %[[TEMP1:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref !CHECK: hlfir.assign %[[TEMP1]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref @@ -180,11 +180,11 @@ subroutine lastprivate() !CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref !CHECK: omp.barrier -!CHECK: %[[TRUE:.*]] = arith.constant true !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref !CHECK: %[[CONST:.*]] = arith.constant 1 : i32 !CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32 !CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref +!CHECK: %[[TRUE:.*]] = arith.constant true !CHECK: fir.if %[[TRUE]] { !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref !CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref @@ -219,11 +219,11 @@ subroutine lastprivate() !CHECK: %[[TEMP:.*]] = fir.load %[[X_DECL]]#0 : !fir.ref !CHECK: hlfir.assign %[[TEMP]] to %[[PRIVATE_X_DECL]]#0 temporary_lhs : i32, !fir.ref !CHECK: omp.barrier -!CHECK: %[[TRUE:.*]] = arith.constant true !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref !CHECK: %[[CONST:.*]] = arith.constant 1 : i32 !CHECK: %[[RESULT:.*]] = arith.addi %[[TEMP]], %[[CONST]] : i32 !CHECK: hlfir.assign %[[RESULT]] to %[[PRIVATE_X_DECL]]#0 : i32, !fir.ref +!CHECK: %[[TRUE:.*]] = arith.constant true !CHECK: fir.if %[[TRUE]] { !CHECK: %[[TEMP:.*]] = fir.load %[[PRIVATE_X_DECL]]#0 : !fir.ref !CHECK: hlfir.assign %[[TEMP]] to %[[X_DECL]]#0 temporary_lhs : i32, !fir.ref diff --git a/flang/test/Lower/OpenMP/wsloop-unstructured.f90 b/flang/test/Lower/OpenMP/wsloop-unstructured.f90 new file mode 100644 index 0000000000000..7fe63a1fe607c --- /dev/null +++ b/flang/test/Lower/OpenMP/wsloop-unstructured.f90 @@ -0,0 +1,61 @@ +! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s + +subroutine sub(imax, jmax, x, y) + integer, intent(in) :: imax, jmax + real, intent(in), dimension(1:imax, 1:jmax) :: x, y + + integer :: i, j, ii + + ! collapse(2) is needed to reproduce the issue + !$omp parallel do collapse(2) + do j = 1, jmax + do i = 1, imax + do ii = 1, imax ! note that this loop is not collapsed + if (x(i,j) < y(ii,j)) then + ! exit needed to force unstructured control flow + exit + endif + enddo + enddo + enddo +end subroutine sub + +! this is testing that we don't crash generating code for this: in particular +! that all blocks are terminated + +! CHECK-LABEL: func.func @_QPsub( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "imax"}, +! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "jmax"}, +! CHECK-SAME: %[[VAL_2:.*]]: !fir.ref> {fir.bindc_name = "x"}, +! CHECK-SAME: %[[VAL_3:.*]]: !fir.ref> {fir.bindc_name = "y"}) { +! [...] +! CHECK: omp.wsloop for (%[[VAL_53:.*]], %[[VAL_54:.*]]) : i32 = ({{.*}}) to ({{.*}}) inclusive step ({{.*}}) { +! [...] +! CHECK: cf.br ^bb1 +! CHECK: ^bb1: +! CHECK: cf.br ^bb2 +! CHECK: ^bb2: +! [...] +! CHECK: cf.br ^bb3 +! CHECK: ^bb3: +! [...] +! CHECK: %[[VAL_63:.*]] = arith.cmpi sgt, %{{.*}}, %{{.*}} : i32 +! CHECK: cf.cond_br %[[VAL_63]], ^bb4, ^bb7 +! CHECK: ^bb4: +! [...] +! CHECK: %[[VAL_76:.*]] = arith.cmpf olt, %{{.*}}, %{{.*}} fastmath : f32 +! CHECK: cf.cond_br %[[VAL_76]], ^bb5, ^bb6 +! CHECK: ^bb5: +! CHECK: cf.br ^bb7 +! CHECK: ^bb6: +! [...] +! CHECK: cf.br ^bb3 +! CHECK: ^bb7: +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: cf.br ^bb1 +! CHECK: ^bb1: +! CHECK: return +! CHECK: }