diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 4a8e84e0ece303..e23f89ae642f33 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -612,23 +612,6 @@ class FirConverter : public Fortran::lower::AbstractConverter { return fir::substBase(hexv, temp); }); - // Replace all uses of the original with the clone/copy, - // esepcially for loop bounds (that uses the variable being privatised) - // since loop bounds use old values that need to be fixed by using the - // new copied value. - // Not able to use replaceAllUsesWith() because uses outside - // the loop body should not use the clone. - // FIXME: Call privatization before the loop operation. - mlir::Region &curRegion = getFirOpBuilder().getRegion(); - mlir::Value oldVal = fir::getBase(hexv); - mlir::Value cloneVal = fir::getBase(exv); - for (auto &oper : curRegion.getOps()) { - for (unsigned int ii = 0; ii < oper.getNumOperands(); ++ii) { - if (oper.getOperand(ii) == oldVal) { - oper.setOperand(ii, cloneVal); - } - } - } return bindIfNewSymbol(sym, exv); } diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index 701504394ca973..13b7c5891b7271 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -69,15 +69,11 @@ class DataSharingProcessor { llvm::SetVector defaultSymbols; llvm::SetVector symbolsInNestedRegions; llvm::SetVector symbolsInParentRegions; - mlir::Operation *op; Fortran::lower::AbstractConverter &converter; fir::FirOpBuilder &firOpBuilder; const Fortran::parser::OmpClauseList &opClauseList; Fortran::lower::pft::Evaluation &eval; - void privatizeSymbol( - const Fortran::semantics::Symbol *sym, - [[maybe_unused]] mlir::OpBuilder::InsertPoint *lastPrivIP = nullptr); bool needBarrier(); void collectSymbols(Fortran::semantics::Symbol::Flag flag); void collectOmpObjectListSymbol( @@ -88,40 +84,57 @@ class DataSharingProcessor { void collectDefaultSymbols(); void privatize(); void defaultPrivatize(); + void copyLastPrivatize(mlir::Operation *op); void insertLastPrivateCompare(mlir::Operation *op); + void cloneSymbol(const Fortran::semantics::Symbol *sym); + void copyFirstPrivateSymbol(const Fortran::semantics::Symbol *sym); + void copyLastPrivateSymbol(const Fortran::semantics::Symbol *sym, + mlir::OpBuilder::InsertPoint *lastPrivIP); public: - DataSharingProcessor(mlir::Operation *op, - Fortran::lower::AbstractConverter &converter, + DataSharingProcessor(Fortran::lower::AbstractConverter &converter, const Fortran::parser::OmpClauseList &opClauseList, Fortran::lower::pft::Evaluation &eval) - : hasLastPrivateOp(false), op(op), converter(converter), + : hasLastPrivateOp(false), converter(converter), firOpBuilder(converter.getFirOpBuilder()), opClauseList(opClauseList), eval(eval) {} - bool process(); + // Privatisation is split into two steps. + // Step1 performs cloning of all privatisation clauses and copying for + // firstprivates. Step1 is performed at the place where process/processStep1 + // is called. This is usually inside the Operation corresponding to the OpenMP + // construct, for looping constructs this is just before the Operation. The + // split into two steps was performed basically to be able to call + // privatisation for looping constructs before the operation is created since + // the bounds of the MLIR OpenMP operation can be privatised. Step2 performs + // the copying for lastprivates. Step2 requires knowledge of the MLIR + // operation to insert the last private update. + bool process(mlir::Operation *op); + void processStep1(); + bool processStep2(mlir::Operation *op); }; -bool DataSharingProcessor::process() { - insPt = firOpBuilder.saveInsertionPoint(); +bool DataSharingProcessor::process(mlir::Operation *op) { + processStep1(); + assert(op && "Current MLIR operation not set"); + return processStep2(op); +} + +void DataSharingProcessor::processStep1() { collectSymbolsForPrivatization(); - insertLastPrivateCompare(op); - if (mlir::isa(op)) { - if (!eval.lowerAsUnstructured()) - firOpBuilder.setInsertionPointToStart(&op->getRegion(0).back()); - } else { - firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); - } - privatize(); collectDefaultSymbols(); + privatize(); defaultPrivatize(); insertBarrier(); +} + +bool DataSharingProcessor::processStep2(mlir::Operation *op) { + insPt = firOpBuilder.saveInsertionPoint(); + copyLastPrivatize(op); firOpBuilder.restoreInsertionPoint(insPt); return hasLastPrivateOp; } -void DataSharingProcessor::privatizeSymbol( - const Fortran::semantics::Symbol *sym, - [[maybe_unused]] mlir::OpBuilder::InsertPoint *lastPrivIP) { +void DataSharingProcessor::cloneSymbol(const Fortran::semantics::Symbol *sym) { // Privatization for symbols which are pre-determined (like loop index // variables) happen separately, for everything else privatize here. if (sym->test(Fortran::semantics::Symbol::Flag::OmpPreDetermined)) @@ -129,18 +142,17 @@ void DataSharingProcessor::privatizeSymbol( bool success = converter.createHostAssociateVarClone(*sym); (void)success; assert(success && "Privatization failed due to existing binding"); - if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::OpBuilder::InsertPoint firstPrivIP, insPt; - if (mlir::isa(op)) { - insPt = firOpBuilder.saveInsertionPoint(); - firOpBuilder.setInsertionPointToStart(&op->getRegion(0).front()); - firstPrivIP = firOpBuilder.saveInsertionPoint(); - } - converter.copyHostAssociateVar(*sym, &firstPrivIP); - if (mlir::isa(op)) - firOpBuilder.restoreInsertionPoint(insPt); - } +} + +void DataSharingProcessor::copyFirstPrivateSymbol( + const Fortran::semantics::Symbol *sym) { + if (sym->test(Fortran::semantics::Symbol::Flag::OmpFirstPrivate)) + converter.copyHostAssociateVar(*sym); +} + +void DataSharingProcessor::copyLastPrivateSymbol( + const Fortran::semantics::Symbol *sym, + [[maybe_unused]] mlir::OpBuilder::InsertPoint *lastPrivIP) { if (sym->test(Fortran::semantics::Symbol::Flag::OmpLastPrivate)) converter.copyHostAssociateVar(*sym, lastPrivIP); } @@ -208,6 +220,7 @@ void DataSharingProcessor ::insertBarrier() { void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { mlir::arith::CmpIOp cmpOp; bool cmpCreated = false; + mlir::OpBuilder::InsertPoint localInsPt = firOpBuilder.saveInsertionPoint(); for (const Fortran::parser::OmpClause &clause : opClauseList.v) { if (std::get_if(&clause.u)) { // TODO: Add lastprivate support for simd construct @@ -320,6 +333,7 @@ void DataSharingProcessor::insertLastPrivateCompare(mlir::Operation *op) { } } } + firOpBuilder.restoreInsertionPoint(localInsPt); } void DataSharingProcessor::collectSymbols( @@ -354,16 +368,27 @@ void DataSharingProcessor::collectDefaultSymbols() { } void DataSharingProcessor::privatize() { + for (auto sym : privatizedSymbols) { + cloneSymbol(sym); + copyFirstPrivateSymbol(sym); + } +} + +void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) { + insertLastPrivateCompare(op); for (auto sym : privatizedSymbols) - privatizeSymbol(sym, &lastPrivIP); + copyLastPrivateSymbol(sym, &lastPrivIP); } void DataSharingProcessor::defaultPrivatize() { - for (auto sym : defaultSymbols) + for (auto sym : defaultSymbols) { if (!symbolsInNestedRegions.contains(sym) && !symbolsInParentRegions.contains(sym) && - !privatizedSymbols.contains(sym)) - privatizeSymbol(sym); + !privatizedSymbols.contains(sym)) { + cloneSymbol(sym); + copyFirstPrivateSymbol(sym); + } + } } /// The COMMON block is a global structure. \p commonValue is the base address @@ -604,7 +629,8 @@ createBodyOfOp(Op &op, Fortran::lower::AbstractConverter &converter, mlir::Location &loc, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OmpClauseList *clauses = nullptr, const SmallVector &args = {}, - bool outerCombined = false) { + bool outerCombined = false, + DataSharingProcessor *dsp = nullptr) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); // If an argument for the region is provided then create the block with that // argument. Also update the symbol's address with the mlir argument value. @@ -665,8 +691,14 @@ createBodyOfOp(Op &op, Fortran::lower::AbstractConverter &converter, // Handle privatization. Do not privatize if this is the outer operation. if (clauses && !outerCombined) { - DataSharingProcessor dsp(op, converter, *clauses, eval); - bool lastPrivateOp = dsp.process(); + bool lastPrivateOp = false; + if (!dsp) { + dsp = new DataSharingProcessor(converter, *clauses, eval); + lastPrivateOp = dsp->process(op); + delete dsp; + } else { + lastPrivateOp = dsp->processStep2(op); + } // LastPrivatization, due to introduction of // new control flow, changes the insertion point, // thus restore it. @@ -1536,6 +1568,9 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, TODO(converter.getCurrentLocation(), "Construct enclosing do loop"); } + DataSharingProcessor dsp(converter, loopOpClauseList, eval); + dsp.processStep1(); + // Collect the loops to collapse. auto *doConstructEval = &eval.getFirstNestedEvaluation(); if (doConstructEval->getIf() @@ -1717,7 +1752,8 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, simdlenClauseOperand, safelenClauseOperand, /*inclusive=*/firOpBuilder.getUnitAttr()); createBodyOfOp(simdLoopOp, converter, currentLocation, - eval, &loopOpClauseList, iv); + eval, &loopOpClauseList, iv, + /*outer=*/false, &dsp); return; } @@ -1808,7 +1844,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, } createBodyOfOp(wsLoopOp, converter, currentLocation, eval, - &loopOpClauseList, iv); + &loopOpClauseList, iv, /*outer=*/false, &dsp); } static void diff --git a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 index 9cadd61d6c35ae..aa95c0712fbb88 100644 --- a/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 +++ b/flang/test/Lower/OpenMP/parallel-lastprivate-clause-scalar.f90 @@ -173,7 +173,7 @@ subroutine firstpriv_lastpriv_int(arg1, arg2) !CHECK: func.func @_QPfirstpriv_lastpriv_int2(%[[ARG1:.*]]: !fir.ref {fir.bindc_name = "arg1"}) { !CHECK: omp.parallel { ! Firstprivate update -!CHECK-NEXT: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1" +!CHECK: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "arg1" !CHECK-NEXT: %[[FPV_LD:.*]] = fir.load %[[ARG1]] : !fir.ref !CHECK-NEXT: fir.store %[[FPV_LD]] to %[[CLONE1]] : !fir.ref !CHECK-NEXT: omp.barrier diff --git a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 index 0b8bb7ff41c8a8..3152f9c44d0c64 100644 --- a/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 +++ b/flang/test/Lower/OpenMP/parallel-private-clause-fixes.f90 @@ -8,8 +8,8 @@ ! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFmultiple_private_fixEx"} ! CHECK: omp.parallel { ! CHECK: %[[PRIV_J:.*]] = fir.alloca i32 {bindc_name = "j", pinned +! CHECK: %[[PRIV_I:.*]] = fir.alloca i32 {adapt.valuebyref, pinned ! CHECK: %[[PRIV_X:.*]] = fir.alloca i32 {bindc_name = "x", pinned -! CHECK: %[[PRIV_I:.*]] = fir.alloca i32 {{{.*}}, pinned ! CHECK: %[[ONE:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_4:.*]] : !fir.ref ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 diff --git a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 index abc0e5198c5dba..ac63c45677ffeb 100644 --- a/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 +++ b/flang/test/Lower/OpenMP/parallel-wsloop-firstpriv.f90 @@ -10,10 +10,10 @@ subroutine omp_do_firstprivate(a) n = a+1 !$omp parallel do firstprivate(a) ! CHECK: omp.parallel { + ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned ! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref ! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref - ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[LB:.*]] = arith.constant 1 : i32 ! CHECK-NEXT: %[[UB:.*]] = fir.load %[[CLONE]] : !fir.ref ! CHECK-NEXT: %[[STEP:.*]] = arith.constant 1 : i32 @@ -36,13 +36,13 @@ subroutine omp_do_firstprivate2(a, n) n = a+1 !$omp parallel do firstprivate(a, n) ! CHECK: omp.parallel { + ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK-NEXT: %[[CLONE:.*]] = fir.alloca i32 {bindc_name = "a", pinned ! CHECK-NEXT: %[[LD:.*]] = fir.load %[[ARG0]] : !fir.ref ! CHECK-NEXT: fir.store %[[LD]] to %[[CLONE]] : !fir.ref ! CHECK-NEXT: %[[CLONE1:.*]] = fir.alloca i32 {bindc_name = "n", pinned ! CHECK-NEXT: %[[LD1:.*]] = fir.load %[[ARG1]] : !fir.ref ! CHECK-NEXT: fir.store %[[LD1]] to %[[CLONE1]] : !fir.ref - ! CHECK-NEXT: %[[REF:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[LB:.*]] = fir.load %[[CLONE]] : !fir.ref diff --git a/flang/test/Lower/OpenMP/parallel-wsloop.f90 b/flang/test/Lower/OpenMP/parallel-wsloop.f90 index 51ee77c82f5a85..c302b91be8e679 100644 --- a/flang/test/Lower/OpenMP/parallel-wsloop.f90 +++ b/flang/test/Lower/OpenMP/parallel-wsloop.f90 @@ -216,11 +216,11 @@ end subroutine parallel_do_private ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "nt"}) { ! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFparallel_do_privateEi"} ! CHECK: omp.parallel { +! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[COND_ADDR:.*]] = fir.alloca !fir.logical<4> {bindc_name = "cond", pinned, uniq_name = "_QFparallel_do_privateEcond"} ! CHECK: %[[NT_ADDR:.*]] = fir.alloca i32 {bindc_name = "nt", pinned, uniq_name = "_QFparallel_do_privateEnt"} ! CHECK: %[[NT:.*]] = fir.load %[[VAL_1]] : !fir.ref ! CHECK: fir.store %[[NT]] to %[[NT_ADDR]] : !fir.ref -! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_8:.*]] = arith.constant 9 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 1 : i32 @@ -256,13 +256,13 @@ end subroutine omp_parallel_do_multiple_firstprivate ! CHECK-SAME: %[[B_ADDR:.*]]: !fir.ref {fir.bindc_name = "b"}) { ! CHECK: %[[I_ADDR:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFomp_parallel_do_multiple_firstprivateEi"} ! CHECK: omp.parallel { +! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[A_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "a", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEa"} ! CHECK: %[[A:.*]] = fir.load %[[A_ADDR]] : !fir.ref ! CHECK: fir.store %[[A]] to %[[A_PRIV_ADDR]] : !fir.ref ! CHECK: %[[B_PRIV_ADDR:.*]] = fir.alloca i32 {bindc_name = "b", pinned, uniq_name = "_QFomp_parallel_do_multiple_firstprivateEb"} ! CHECK: %[[B:.*]] = fir.load %[[B_ADDR]] : !fir.ref ! CHECK: fir.store %[[B]] to %[[B_PRIV_ADDR]] : !fir.ref -! CHECK: %[[I_PRIV_ADDR:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} ! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 diff --git a/flang/test/Lower/OpenMP/single.f90 b/flang/test/Lower/OpenMP/single.f90 index d4139ce0fe43b1..22d9bb1d5d70ed 100644 --- a/flang/test/Lower/OpenMP/single.f90 +++ b/flang/test/Lower/OpenMP/single.f90 @@ -72,9 +72,9 @@ end subroutine single_allocate ! CHECK-LABEL: func.func @_QPsingle_privatization( ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "x"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "y"}) { -! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFsingle_privatizationEx"} -! CHECK: %[[VAL_3:.*]] = fir.alloca f64 {bindc_name = "y", pinned, uniq_name = "_QFsingle_privatizationEy"} ! CHECK: omp.single { +! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFsingle_privatizationEx"} +! CHECK: %[[VAL_3:.*]] = fir.alloca f64 {bindc_name = "y", pinned, uniq_name = "_QFsingle_privatizationEy"} ! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_1]] : !fir.ref ! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref ! CHECK: fir.call @_QPbar(%[[VAL_2]], %[[VAL_3]]) {{.*}}: (!fir.ref, !fir.ref) -> () @@ -96,9 +96,9 @@ subroutine single_privatization(x, y) ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "x"}, ! CHECK-SAME: %[[VAL_1:.*]]: !fir.ref {fir.bindc_name = "y"}) { ! CHECK: omp.parallel { -! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFsingle_privatization2Ex"} -! CHECK: %[[VAL_3:.*]] = fir.alloca f64 {bindc_name = "y", pinned, uniq_name = "_QFsingle_privatization2Ey"} ! CHECK: omp.single { +! CHECK: %[[VAL_2:.*]] = fir.alloca f32 {bindc_name = "x", pinned, uniq_name = "_QFsingle_privatization2Ex"} +! CHECK: %[[VAL_3:.*]] = fir.alloca f64 {bindc_name = "y", pinned, uniq_name = "_QFsingle_privatization2Ey"} ! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_1]] : !fir.ref ! CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref ! CHECK: fir.call @_QPbar(%[[VAL_2]], %[[VAL_3]]) {{.*}}: (!fir.ref, !fir.ref) -> ()