diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index ae0d8bd37228d..4c51b61f6bf02 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -832,8 +832,8 @@ createMapInfoOp(fir::FirOpBuilder &builder, mlir::Location loc, } bool ClauseProcessor::processMap( - mlir::Location currentLocation, const llvm::omp::Directive &directive, - Fortran::lower::StatementContext &stmtCtx, mlir::omp::MapClauseOps &result, + mlir::Location currentLocation, Fortran::lower::StatementContext &stmtCtx, + mlir::omp::MapClauseOps &result, llvm::SmallVectorImpl *mapSyms, llvm::SmallVectorImpl *mapSymLocs, llvm::SmallVectorImpl *mapSymTypes) const { diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index aa2c14b61e756..3f9701310ebae 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -114,8 +114,7 @@ class ClauseProcessor { // They may be used later on to create the block_arguments for some of the // target directives that require it. bool processMap( - mlir::Location currentLocation, const llvm::omp::Directive &directive, - Fortran::lower::StatementContext &stmtCtx, + mlir::Location currentLocation, Fortran::lower::StatementContext &stmtCtx, mlir::omp::MapClauseOps &result, llvm::SmallVectorImpl *mapSyms = nullptr, diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 352ca66e8735b..9b99752236662 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -222,6 +222,276 @@ createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter, return storeOp; } +// This helper function implements the functionality of "promoting" +// non-CPTR arguments of use_device_ptr to use_device_addr +// arguments (automagic conversion of use_device_ptr -> +// use_device_addr in these cases). The way we do so currently is +// through the shuffling of operands from the devicePtrOperands to +// deviceAddrOperands where neccesary and re-organizing the types, +// locations and symbols to maintain the correct ordering of ptr/addr +// input -> BlockArg. +// +// This effectively implements some deprecated OpenMP functionality +// that some legacy applications unfortunately depend on +// (deprecated in specification version 5.2): +// +// "If a list item in a use_device_ptr clause is not of type C_PTR, +// the behavior is as if the list item appeared in a use_device_addr +// clause. Support for such list items in a use_device_ptr clause +// is deprecated." +static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( + mlir::omp::UseDeviceClauseOps &clauseOps, + llvm::SmallVectorImpl &useDeviceTypes, + llvm::SmallVectorImpl &useDeviceLocs, + llvm::SmallVectorImpl + &useDeviceSymbols) { + auto moveElementToBack = [](size_t idx, auto &vector) { + auto *iter = std::next(vector.begin(), idx); + vector.push_back(*iter); + vector.erase(iter); + }; + + // Iterate over our use_device_ptr list and shift all non-cptr arguments into + // use_device_addr. + for (auto *it = clauseOps.useDevicePtrVars.begin(); + it != clauseOps.useDevicePtrVars.end();) { + if (!fir::isa_builtin_cptr_type(fir::unwrapRefType(it->getType()))) { + clauseOps.useDeviceAddrVars.push_back(*it); + // We have to shuffle the symbols around as well, to maintain + // the correct Input -> BlockArg for use_device_ptr/use_device_addr. + // NOTE: However, as map's do not seem to be included currently + // this isn't as pertinent, but we must try to maintain for + // future alterations. I believe the reason they are not currently + // is that the BlockArg assign/lowering needs to be extended + // to a greater set of types. + auto idx = std::distance(clauseOps.useDevicePtrVars.begin(), it); + moveElementToBack(idx, useDeviceTypes); + moveElementToBack(idx, useDeviceLocs); + moveElementToBack(idx, useDeviceSymbols); + it = clauseOps.useDevicePtrVars.erase(it); + continue; + } + ++it; + } +} + +/// Extract the list of function and variable symbols affected by the given +/// 'declare target' directive and return the intended device type for them. +static void getDeclareTargetInfo( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, + mlir::omp::DeclareTargetClauseOps &clauseOps, + llvm::SmallVectorImpl &symbolAndClause) { + const auto &spec = std::get( + declareTargetConstruct.t); + if (const auto *objectList{ + Fortran::parser::Unwrap(spec.u)}) { + ObjectList objects{makeObjects(*objectList, semaCtx)}; + // Case: declare target(func, var1, var2) + gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to, + symbolAndClause); + } else if (const auto *clauseList{ + Fortran::parser::Unwrap( + spec.u)}) { + if (clauseList->v.empty()) { + // Case: declare target, implicit capture of function + symbolAndClause.emplace_back( + mlir::omp::DeclareTargetCaptureClause::to, + eval.getOwningProcedure()->getSubprogramSymbol()); + } + + ClauseProcessor cp(converter, semaCtx, *clauseList); + cp.processDeviceType(clauseOps); + cp.processEnter(symbolAndClause); + cp.processLink(symbolAndClause); + cp.processTo(symbolAndClause); + + cp.processTODO(converter.getCurrentLocation(), + llvm::omp::Directive::OMPD_declare_target); + } +} + +static void collectDeferredDeclareTargets( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, + llvm::SmallVectorImpl + &deferredDeclareTarget) { + mlir::omp::DeclareTargetClauseOps clauseOps; + llvm::SmallVector symbolAndClause; + getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, + clauseOps, symbolAndClause); + // Return the device type only if at least one of the targets for the + // directive is a function or subroutine + mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); + + for (const DeclareTargetCapturePair &symClause : symbolAndClause) { + mlir::Operation *op = mod.lookupSymbol(converter.mangleName( + std::get(symClause))); + + if (!op) { + deferredDeclareTarget.push_back({std::get<0>(symClause), + clauseOps.deviceType, + std::get<1>(symClause)}); + } + } +} + +static std::optional +getDeclareTargetFunctionDevice( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPDeclareTargetConstruct + &declareTargetConstruct) { + mlir::omp::DeclareTargetClauseOps clauseOps; + llvm::SmallVector symbolAndClause; + getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, + clauseOps, symbolAndClause); + + // Return the device type only if at least one of the targets for the + // directive is a function or subroutine + mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); + for (const DeclareTargetCapturePair &symClause : symbolAndClause) { + mlir::Operation *op = mod.lookupSymbol(converter.mangleName( + std::get(symClause))); + + if (mlir::isa_and_nonnull(op)) + return clauseOps.deviceType; + } + + return std::nullopt; +} + +static llvm::SmallVector +genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, + mlir::Location &loc, + llvm::ArrayRef args) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + auto ®ion = op->getRegion(0); + + std::size_t loopVarTypeSize = 0; + for (const Fortran::semantics::Symbol *arg : args) + loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + llvm::SmallVector tiv(args.size(), loopVarType); + llvm::SmallVector locs(args.size(), loc); + firOpBuilder.createBlock(®ion, {}, tiv, locs); + // The argument is not currently in memory, so make a temporary for the + // argument, and store it there, then bind that location to the argument. + mlir::Operation *storeOp = nullptr; + for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { + mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex)); + storeOp = + createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); + } + firOpBuilder.setInsertionPointAfter(storeOp); + return llvm::SmallVector(args); +} + +static void genReductionVars( + mlir::Operation *op, Fortran::lower::AbstractConverter &converter, + mlir::Location &loc, + llvm::ArrayRef reductionArgs, + llvm::ArrayRef reductionTypes) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + llvm::SmallVector blockArgLocs(reductionArgs.size(), loc); + + mlir::Block *entryBlock = firOpBuilder.createBlock( + &op->getRegion(0), {}, reductionTypes, blockArgLocs); + + // Bind the reduction arguments to their block arguments. + for (auto [arg, prv] : + llvm::zip_equal(reductionArgs, entryBlock->getArguments())) { + converter.bindSymbol(*arg, prv); + } +} + +static llvm::SmallVector +genLoopAndReductionVars( + mlir::Operation *op, Fortran::lower::AbstractConverter &converter, + mlir::Location &loc, + llvm::ArrayRef loopArgs, + llvm::ArrayRef reductionArgs, + llvm::ArrayRef reductionTypes) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + + llvm::SmallVector blockArgTypes; + llvm::SmallVector blockArgLocs; + blockArgTypes.reserve(loopArgs.size() + reductionArgs.size()); + blockArgLocs.reserve(blockArgTypes.size()); + mlir::Block *entryBlock; + + if (loopArgs.size()) { + std::size_t loopVarTypeSize = 0; + for (const Fortran::semantics::Symbol *arg : loopArgs) + loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + std::fill_n(std::back_inserter(blockArgTypes), loopArgs.size(), + loopVarType); + std::fill_n(std::back_inserter(blockArgLocs), loopArgs.size(), loc); + } + if (reductionArgs.size()) { + llvm::copy(reductionTypes, std::back_inserter(blockArgTypes)); + std::fill_n(std::back_inserter(blockArgLocs), reductionArgs.size(), loc); + } + entryBlock = firOpBuilder.createBlock(&op->getRegion(0), {}, blockArgTypes, + blockArgLocs); + // The argument is not currently in memory, so make a temporary for the + // argument, and store it there, then bind that location to the argument. + if (loopArgs.size()) { + mlir::Operation *storeOp = nullptr; + for (auto [argIndex, argSymbol] : llvm::enumerate(loopArgs)) { + mlir::Value indexVal = + fir::getBase(op->getRegion(0).front().getArgument(argIndex)); + storeOp = + createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); + } + firOpBuilder.setInsertionPointAfter(storeOp); + } + // Bind the reduction arguments to their block arguments + for (auto [arg, prv] : llvm::zip_equal( + reductionArgs, + llvm::drop_begin(entryBlock->getArguments(), loopArgs.size()))) { + converter.bindSymbol(*arg, prv); + } + + return llvm::SmallVector(loopArgs); +} + +static void +markDeclareTarget(mlir::Operation *op, + Fortran::lower::AbstractConverter &converter, + mlir::omp::DeclareTargetCaptureClause captureClause, + mlir::omp::DeclareTargetDeviceType deviceType) { + // TODO: Add support for program local variables with declare target applied + auto declareTargetOp = llvm::dyn_cast(op); + if (!declareTargetOp) + fir::emitFatalError( + converter.getCurrentLocation(), + "Attempt to apply declare target on unsupported operation"); + + // The function or global already has a declare target applied to it, very + // likely through implicit capture (usage in another declare target + // function/subroutine). It should be marked as any if it has been assigned + // both host and nohost, else we skip, as there is no change + if (declareTargetOp.isDeclareTarget()) { + if (declareTargetOp.getDeclareTargetDeviceType() != deviceType) + declareTargetOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any, + captureClause); + return; + } + + declareTargetOp.setDeclareTarget(deviceType, captureClause); +} + +//===----------------------------------------------------------------------===// +// Op body generation helper structures and functions +//===----------------------------------------------------------------------===// + struct OpWithBodyGenInfo { /// A type for a code-gen callback function. This takes as argument the op for /// which the code is being generated and returns the arguments of the op's @@ -493,548 +763,737 @@ static void genBodyOfTargetDataOp( genNestedEvaluations(converter, eval); } -template -static OpTy genOpWithBody(OpWithBodyGenInfo &info, Args &&...args) { - auto op = info.converter.getFirOpBuilder().create( - info.loc, std::forward(args)...); - createBodyOfOp(op, info); - return op; -} - -static mlir::omp::MasterOp -genMasterOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation) { - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested)); -} - -static mlir::omp::OrderedRegionOp -genOrderedRegionOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList) { - mlir::omp::OrderedRegionClauseOps clauseOps; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processTODO(currentLocation, - llvm::omp::Directive::OMPD_ordered); - - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested), - clauseOps); -} +// This functions creates a block for the body of the targetOp's region. It adds +// all the symbols present in mapSymbols as block arguments to this block. +static void +genBodyOfTargetOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::omp::TargetOp &targetOp, + llvm::ArrayRef mapSyms, + llvm::ArrayRef mapSymLocs, + llvm::ArrayRef mapSymTypes, + const mlir::Location ¤tLocation) { + assert(mapSymTypes.size() == mapSymLocs.size()); -static mlir::omp::ParallelOp -genParallelOp(Fortran::lower::AbstractConverter &converter, - Fortran::lower::SymMap &symTable, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList, - bool outerCombined = false) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - Fortran::lower::StatementContext stmtCtx; - mlir::omp::ParallelClauseOps clauseOps; - llvm::SmallVector privateSyms; - llvm::SmallVector reductionTypes; - llvm::SmallVector reductionSyms; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(llvm::omp::Directive::OMPD_parallel, clauseOps); - cp.processNumThreads(stmtCtx, clauseOps); - cp.processProcBind(clauseOps); - cp.processDefault(); - cp.processAllocate(clauseOps); - - if (!outerCombined) - cp.processReduction(currentLocation, clauseOps, &reductionTypes, - &reductionSyms); + mlir::Region ®ion = targetOp.getRegion(); - if (ReductionProcessor::doReductionByRef(clauseOps.reductionVars)) - clauseOps.reductionByRefAttr = firOpBuilder.getUnitAttr(); + auto *regionBlock = + firOpBuilder.createBlock(®ion, {}, mapSymTypes, mapSymLocs); - auto reductionCallback = [&](mlir::Operation *op) { - llvm::SmallVector locs(clauseOps.reductionVars.size(), - currentLocation); - auto *block = - firOpBuilder.createBlock(&op->getRegion(0), {}, reductionTypes, locs); - for (auto [arg, prv] : - llvm::zip_equal(reductionSyms, block->getArguments())) { - converter.bindSymbol(*arg, prv); + // Clones the `bounds` placing them inside the target region and returns them. + auto cloneBound = [&](mlir::Value bound) { + if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { + mlir::Operation *clonedOp = bound.getDefiningOp()->clone(); + regionBlock->push_back(clonedOp); + return clonedOp->getResult(0); } - return reductionSyms; + TODO(converter.getCurrentLocation(), + "target map clause operand unsupported bound type"); }; - OpWithBodyGenInfo genInfo = - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setOuterCombined(outerCombined) - .setClauses(&clauseList) - .setReductions(&reductionSyms, &reductionTypes) - .setGenRegionEntryCb(reductionCallback); + auto cloneBounds = [cloneBound](llvm::ArrayRef bounds) { + llvm::SmallVector clonedBounds; + for (mlir::Value bound : bounds) + clonedBounds.emplace_back(cloneBound(bound)); + return clonedBounds; + }; - if (!enableDelayedPrivatization) - return genOpWithBody(genInfo, clauseOps); + // Bind the symbols to their corresponding block arguments. + for (auto [argIndex, argSymbol] : llvm::enumerate(mapSyms)) { + const mlir::BlockArgument &arg = region.getArgument(argIndex); + // Avoid capture of a reference to a structured binding. + const Fortran::semantics::Symbol *sym = argSymbol; + // Structure component symbols don't have bindings. + if (sym->owner().IsDerivedType()) + continue; + fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym); + extVal.match( + [&](const fir::BoxValue &v) { + converter.bindSymbol(*sym, + fir::BoxValue(arg, cloneBounds(v.getLBounds()), + v.getExplicitParameters(), + v.getExplicitExtents())); + }, + [&](const fir::MutableBoxValue &v) { + converter.bindSymbol( + *sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()), + v.getMutableProperties())); + }, + [&](const fir::ArrayBoxValue &v) { + converter.bindSymbol( + *sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()), + cloneBounds(v.getLBounds()), + v.getSourceBox())); + }, + [&](const fir::CharArrayBoxValue &v) { + converter.bindSymbol( + *sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()), + cloneBounds(v.getExtents()), + cloneBounds(v.getLBounds()))); + }, + [&](const fir::CharBoxValue &v) { + converter.bindSymbol(*sym, + fir::CharBoxValue(arg, cloneBound(v.getLen()))); + }, + [&](const fir::UnboxedValue &v) { converter.bindSymbol(*sym, arg); }, + [&](const auto &) { + TODO(converter.getCurrentLocation(), + "target map clause operand unsupported type"); + }); + } - bool privatize = !outerCombined; - DataSharingProcessor dsp(converter, semaCtx, clauseList, eval, - /*useDelayedPrivatization=*/true, &symTable); + // Check if cloning the bounds introduced any dependency on the outer region. + // If so, then either clone them as well if they are MemoryEffectFree, or else + // copy them to a new temporary and add them to the map and block_argument + // lists and replace their uses with the new temporary. + llvm::SetVector valuesDefinedAbove; + mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); + while (!valuesDefinedAbove.empty()) { + for (mlir::Value val : valuesDefinedAbove) { + mlir::Operation *valOp = val.getDefiningOp(); + if (mlir::isMemoryEffectFree(valOp)) { + mlir::Operation *clonedOp = valOp->clone(); + regionBlock->push_front(clonedOp); + val.replaceUsesWithIf( + clonedOp->getResult(0), [regionBlock](mlir::OpOperand &use) { + return use.getOwner()->getBlock() == regionBlock; + }); + } else { + auto savedIP = firOpBuilder.getInsertionPoint(); + firOpBuilder.setInsertionPointAfter(valOp); + auto copyVal = + firOpBuilder.createTemporary(val.getLoc(), val.getType()); + firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); - if (privatize) - dsp.processStep1(&clauseOps, &privateSyms); + llvm::SmallVector bounds; + std::stringstream name; + firOpBuilder.setInsertionPoint(targetOp); + mlir::Value mapOp = createMapInfoOp( + firOpBuilder, copyVal.getLoc(), copyVal, mlir::Value{}, name.str(), + bounds, llvm::SmallVector{}, + static_cast< + std::underlying_type_t>( + llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), + mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); + targetOp.getMapOperandsMutable().append(mapOp); + mlir::Value clonedValArg = + region.addArgument(copyVal.getType(), copyVal.getLoc()); + firOpBuilder.setInsertionPointToStart(regionBlock); + auto loadOp = firOpBuilder.create(clonedValArg.getLoc(), + clonedValArg); + val.replaceUsesWithIf( + loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) { + return use.getOwner()->getBlock() == regionBlock; + }); + firOpBuilder.setInsertionPoint(regionBlock, savedIP); + } + } + valuesDefinedAbove.clear(); + mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); + } - auto genRegionEntryCB = [&](mlir::Operation *op) { - auto parallelOp = llvm::cast(op); + // Insert dummy instruction to remember the insertion position. The + // marker will be deleted since there are not uses. + // In the HLFIR flow there are hlfir.declares inserted above while + // setting block arguments. + mlir::Value undefMarker = firOpBuilder.create( + targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType()); - llvm::SmallVector reductionLocs( - clauseOps.reductionVars.size(), currentLocation); + // Create blocks for unstructured regions. This has to be done since + // blocks are initially allocated with the function as the parent region. + if (eval.lowerAsUnstructured()) { + Fortran::lower::createEmptyRegionBlocks( + firOpBuilder, eval.getNestedEvaluations()); + } - mlir::OperandRange privateVars = parallelOp.getPrivateVars(); - mlir::Region ®ion = parallelOp.getRegion(); + firOpBuilder.create(currentLocation); - llvm::SmallVector privateVarTypes = reductionTypes; - privateVarTypes.reserve(privateVarTypes.size() + privateVars.size()); - llvm::transform(privateVars, std::back_inserter(privateVarTypes), - [](mlir::Value v) { return v.getType(); }); + // Create the insertion point after the marker. + firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); + if (genNested) + genNestedEvaluations(converter, eval); +} - llvm::SmallVector privateVarLocs = reductionLocs; - privateVarLocs.reserve(privateVarLocs.size() + privateVars.size()); - llvm::transform(privateVars, std::back_inserter(privateVarLocs), - [](mlir::Value v) { return v.getLoc(); }); +template +static OpTy genOpWithBody(OpWithBodyGenInfo &info, Args &&...args) { + auto op = info.converter.getFirOpBuilder().create( + info.loc, std::forward(args)...); + createBodyOfOp(op, info); + return op; +} - firOpBuilder.createBlock(®ion, /*insertPt=*/{}, privateVarTypes, - privateVarLocs); +//===----------------------------------------------------------------------===// +// Code generation functions for clauses +//===----------------------------------------------------------------------===// - llvm::SmallVector allSymbols = - reductionSyms; - allSymbols.append(privateSyms); - for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) { - converter.bindSymbol(*arg, prv); - } +static void genCriticalDeclareClauses( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, mlir::Location loc, + mlir::omp::CriticalClauseOps &clauseOps, llvm::StringRef name) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processHint(clauseOps); + clauseOps.nameAttr = + mlir::StringAttr::get(converter.getFirOpBuilder().getContext(), name); +} - return allSymbols; - }; +static void genFlushClauses( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const std::optional &objects, + const std::optional> + &clauses, + mlir::Location loc, llvm::SmallVectorImpl &operandRange) { + if (objects) + genObjectList2(*objects, converter, operandRange); + + if (clauses && clauses->size() > 0) + TODO(converter.getCurrentLocation(), "Handle OmpMemoryOrderClause"); +} - // TODO Merge with the reduction CB. - genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp); - return genOpWithBody(genInfo, clauseOps); +static void +genOrderedRegionClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + mlir::omp::OrderedRegionClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processTODO(loc, llvm::omp::Directive::OMPD_ordered); +} + +static void genParallelClauses( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, mlir::Location loc, + bool processReduction, mlir::omp::ParallelClauseOps &clauseOps, + llvm::SmallVectorImpl &reductionTypes, + llvm::SmallVectorImpl &reductionSyms) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + cp.processDefault(); + cp.processIf(llvm::omp::Directive::OMPD_parallel, clauseOps); + cp.processNumThreads(stmtCtx, clauseOps); + cp.processProcBind(clauseOps); + + if (processReduction) { + cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms); + if (ReductionProcessor::doReductionByRef(clauseOps.reductionVars)) + clauseOps.reductionByRefAttr = converter.getFirOpBuilder().getUnitAttr(); + } } -static mlir::omp::SectionOp -genSectionOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList §ionsClauseList) { - // Currently only private/firstprivate clause is handled, and - // all privatization is done within `omp.section` operations. - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setClauses(§ionsClauseList)); +static void genSectionsClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + bool clausesFromBeginSections, + mlir::omp::SectionsClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + if (clausesFromBeginSections) { + cp.processAllocate(clauseOps); + cp.processSectionsReduction(loc, clauseOps); + // TODO Support delayed privatization. + } else { + cp.processNowait(clauseOps); + } } -static mlir::omp::SingleOp -genSingleOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &beginClauseList, - const Fortran::parser::OmpClauseList &endClauseList) { - mlir::omp::SingleClauseOps clauseOps; +static void genSimdLoopClauses( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OmpClauseList &clauses, mlir::Location loc, + mlir::omp::SimdLoopClauseOps &clauseOps, + llvm::SmallVectorImpl &iv) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processCollapse(loc, eval, clauseOps, iv); + cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps); + cp.processReduction(loc, clauseOps); + cp.processSafelen(clauseOps); + cp.processSimdlen(clauseOps); + clauseOps.loopInclusiveAttr = converter.getFirOpBuilder().getUnitAttr(); + // TODO Support delayed privatization. - ClauseProcessor cp(converter, semaCtx, beginClauseList); - cp.processAllocate(clauseOps); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_simd); +} + +static void genSingleClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &beginClauses, + const Fortran::parser::OmpClauseList &endClauses, + mlir::Location loc, + mlir::omp::SingleClauseOps &clauseOps) { + ClauseProcessor bcp(converter, semaCtx, beginClauses); + bcp.processAllocate(clauseOps); // TODO Support delayed privatization. - ClauseProcessor ecp(converter, semaCtx, endClauseList); + ClauseProcessor ecp(converter, semaCtx, endClauses); + ecp.processCopyprivate(loc, clauseOps); ecp.processNowait(clauseOps); - ecp.processCopyprivate(currentLocation, clauseOps); +} - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setClauses(&beginClauseList), - clauseOps); +static void genTargetClauses( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, mlir::Location loc, + bool processHostOnlyClauses, bool processReduction, + mlir::omp::TargetClauseOps &clauseOps, + llvm::SmallVectorImpl &mapSyms, + llvm::SmallVectorImpl &mapLocs, + llvm::SmallVectorImpl &mapTypes, + llvm::SmallVectorImpl &deviceAddrSyms, + llvm::SmallVectorImpl &deviceAddrLocs, + llvm::SmallVectorImpl &deviceAddrTypes, + llvm::SmallVectorImpl &devicePtrSyms, + llvm::SmallVectorImpl &devicePtrLocs, + llvm::SmallVectorImpl &devicePtrTypes) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processDepend(clauseOps); + cp.processDevice(stmtCtx, clauseOps); + cp.processHasDeviceAddr(clauseOps, deviceAddrTypes, deviceAddrLocs, + deviceAddrSyms); + cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps); + cp.processIsDevicePtr(clauseOps, devicePtrTypes, devicePtrLocs, + devicePtrSyms); + cp.processMap(loc, stmtCtx, clauseOps, &mapSyms, &mapLocs, &mapTypes); + cp.processThreadLimit(stmtCtx, clauseOps); + // TODO Support delayed privatization. + + if (processHostOnlyClauses) + cp.processNowait(clauseOps); + + cp.processTODO(loc, + llvm::omp::Directive::OMPD_target); } -static mlir::omp::TaskOp -genTaskOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList) { - Fortran::lower::StatementContext stmtCtx; - mlir::omp::TaskClauseOps clauseOps; +static void genTargetDataClauses( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, mlir::Location loc, + mlir::omp::TargetDataClauseOps &clauseOps, + llvm::SmallVectorImpl &useDeviceTypes, + llvm::SmallVectorImpl &useDeviceLocs, + llvm::SmallVectorImpl &useDeviceSyms) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processDevice(stmtCtx, clauseOps); + cp.processIf(llvm::omp::Directive::OMPD_target_data, clauseOps); + cp.processMap(loc, stmtCtx, clauseOps); + cp.processUseDeviceAddr(clauseOps, useDeviceTypes, useDeviceLocs, + useDeviceSyms); + cp.processUseDevicePtr(clauseOps, useDeviceTypes, useDeviceLocs, + useDeviceSyms); - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(llvm::omp::Directive::OMPD_task, clauseOps); + // This function implements the deprecated functionality of use_device_ptr + // that allows users to provide non-CPTR arguments to it with the caveat + // that the compiler will treat them as use_device_addr. A lot of legacy + // code may still depend on this functionality, so we should support it + // in some manner. We do so currently by simply shifting non-cptr operands + // from the use_device_ptr list into the front of the use_device_addr list + // whilst maintaining the ordering of useDeviceLocs, useDeviceSyms and + // useDeviceTypes to use_device_ptr/use_device_addr input for BlockArg + // ordering. + // TODO: Perhaps create a user provideable compiler option that will + // re-introduce a hard-error rather than a warning in these cases. + promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(clauseOps, useDeviceTypes, + useDeviceLocs, useDeviceSyms); +} + +static void genTargetEnterExitUpdateDataClauses( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, mlir::Location loc, + llvm::omp::Directive directive, + mlir::omp::TargetEnterExitUpdateDataClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processDepend(clauseOps); + cp.processDevice(stmtCtx, clauseOps); + cp.processIf(directive, clauseOps); + cp.processNowait(clauseOps); + + if (directive == llvm::omp::Directive::OMPD_target_update) { + cp.processMotionClauses(stmtCtx, clauseOps); + cp.processMotionClauses(stmtCtx, clauseOps); + } else { + cp.processMap(loc, stmtCtx, clauseOps); + } +} + +static void genTaskClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + mlir::omp::TaskClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); cp.processDefault(); + cp.processDepend(clauseOps); cp.processFinal(stmtCtx, clauseOps); - cp.processUntied(clauseOps); + cp.processIf(llvm::omp::Directive::OMPD_task, clauseOps); cp.processMergeable(clauseOps); cp.processPriority(stmtCtx, clauseOps); - cp.processDepend(clauseOps); + cp.processUntied(clauseOps); // TODO Support delayed privatization. - cp.processTODO( - currentLocation, llvm::omp::Directive::OMPD_task); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_task); +} - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setClauses(&clauseList), - clauseOps); +static void genTaskgroupClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + mlir::omp::TaskgroupClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processAllocate(clauseOps); + + cp.processTODO(loc, + llvm::omp::Directive::OMPD_taskgroup); } -static mlir::omp::TaskgroupOp -genTaskgroupOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList) { - mlir::omp::TaskgroupClauseOps clauseOps; +static void genTaskwaitClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + mlir::omp::TaskwaitClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); + cp.processTODO( + loc, llvm::omp::Directive::OMPD_taskwait); +} - ClauseProcessor cp(converter, semaCtx, clauseList); +static void genTeamsClauses(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + const Fortran::parser::OmpClauseList &clauses, + mlir::Location loc, + mlir::omp::TeamsClauseOps &clauseOps) { + ClauseProcessor cp(converter, semaCtx, clauses); cp.processAllocate(clauseOps); - cp.processTODO(currentLocation, - llvm::omp::Directive::OMPD_taskgroup); + cp.processDefault(); + cp.processIf(llvm::omp::Directive::OMPD_teams, clauseOps); + cp.processNumTeams(stmtCtx, clauseOps); + cp.processThreadLimit(stmtCtx, clauseOps); + // TODO Support delayed privatization. - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setClauses(&clauseList), - clauseOps); + cp.processTODO(loc, llvm::omp::Directive::OMPD_teams); } -// This helper function implements the functionality of "promoting" -// non-CPTR arguments of use_device_ptr to use_device_addr -// arguments (automagic conversion of use_device_ptr -> -// use_device_addr in these cases). The way we do so currently is -// through the shuffling of operands from the devicePtrOperands to -// deviceAddrOperands where neccesary and re-organizing the types, -// locations and symbols to maintain the correct ordering of ptr/addr -// input -> BlockArg. -// -// This effectively implements some deprecated OpenMP functionality -// that some legacy applications unfortunately depend on -// (deprecated in specification version 5.2): -// -// "If a list item in a use_device_ptr clause is not of type C_PTR, -// the behavior is as if the list item appeared in a use_device_addr -// clause. Support for such list items in a use_device_ptr clause -// is deprecated." -static void promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr( - mlir::omp::UseDeviceClauseOps &clauseOps, - llvm::SmallVectorImpl &useDeviceTypes, - llvm::SmallVectorImpl &useDeviceLocs, - llvm::SmallVectorImpl - &useDeviceSymbols) { - auto moveElementToBack = [](size_t idx, auto &vector) { - auto *iter = std::next(vector.begin(), idx); - vector.push_back(*iter); - vector.erase(iter); - }; +static void genWsloopClauses( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::StatementContext &stmtCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OmpClauseList &beginClauses, + const Fortran::parser::OmpClauseList *endClauses, mlir::Location loc, + mlir::omp::WsloopClauseOps &clauseOps, + llvm::SmallVectorImpl &iv, + llvm::SmallVectorImpl &reductionTypes, + llvm::SmallVectorImpl &reductionSyms) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + ClauseProcessor bcp(converter, semaCtx, beginClauses); + bcp.processCollapse(loc, eval, clauseOps, iv); + bcp.processOrdered(clauseOps); + bcp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms); + bcp.processSchedule(stmtCtx, clauseOps); + clauseOps.loopInclusiveAttr = firOpBuilder.getUnitAttr(); + // TODO Support delayed privatization. - // Iterate over our use_device_ptr list and shift all non-cptr arguments into - // use_device_addr. - for (auto *it = clauseOps.useDevicePtrVars.begin(); - it != clauseOps.useDevicePtrVars.end();) { - if (!fir::isa_builtin_cptr_type(fir::unwrapRefType(it->getType()))) { - clauseOps.useDeviceAddrVars.push_back(*it); - // We have to shuffle the symbols around as well, to maintain - // the correct Input -> BlockArg for use_device_ptr/use_device_addr. - // NOTE: However, as map's do not seem to be included currently - // this isn't as pertinent, but we must try to maintain for - // future alterations. I believe the reason they are not currently - // is that the BlockArg assign/lowering needs to be extended - // to a greater set of types. - auto idx = std::distance(clauseOps.useDevicePtrVars.begin(), it); - moveElementToBack(idx, useDeviceTypes); - moveElementToBack(idx, useDeviceLocs); - moveElementToBack(idx, useDeviceSymbols); - it = clauseOps.useDevicePtrVars.erase(it); - continue; + if (ReductionProcessor::doReductionByRef(clauseOps.reductionVars)) + clauseOps.reductionByRefAttr = firOpBuilder.getUnitAttr(); + + if (endClauses) { + ClauseProcessor ecp(converter, semaCtx, *endClauses); + ecp.processNowait(clauseOps); + } + + bcp.processTODO( + loc, llvm::omp::Directive::OMPD_do); +} + +//===----------------------------------------------------------------------===// +// Code generation functions for leaf constructs +//===----------------------------------------------------------------------===// + +static mlir::omp::BarrierOp +genBarrierOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc) { + return converter.getFirOpBuilder().create(loc); +} + +static mlir::omp::CriticalOp +genCriticalOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location loc, + const Fortran::parser::OmpClauseList &clauseList, + const std::optional &name) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + mlir::FlatSymbolRefAttr nameAttr; + + if (name) { + std::string nameStr = name->ToString(); + mlir::ModuleOp mod = firOpBuilder.getModule(); + auto global = mod.lookupSymbol(nameStr); + if (!global) { + mlir::omp::CriticalClauseOps clauseOps; + genCriticalDeclareClauses(converter, semaCtx, clauseList, loc, clauseOps, + nameStr); + + mlir::OpBuilder modBuilder(mod.getBodyRegion()); + global = modBuilder.create(loc, clauseOps); } - ++it; + nameAttr = mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(), + global.getSymName()); } + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, loc, eval).setGenNested(genNested), + nameAttr); } -static mlir::omp::TargetDataOp -genTargetDataOp(Fortran::lower::AbstractConverter &converter, +static mlir::omp::DistributeOp +genDistributeOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, + mlir::Location loc, const Fortran::parser::OmpClauseList &clauseList) { - Fortran::lower::StatementContext stmtCtx; - mlir::omp::TargetDataClauseOps clauseOps; - llvm::SmallVector useDeviceTypes; - llvm::SmallVector useDeviceLocs; - llvm::SmallVector useDeviceSyms; + TODO(loc, "Distribute construct"); + return nullptr; +} - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(llvm::omp::Directive::OMPD_target_data, clauseOps); - cp.processDevice(stmtCtx, clauseOps); - cp.processUseDevicePtr(clauseOps, useDeviceTypes, useDeviceLocs, - useDeviceSyms); - cp.processUseDeviceAddr(clauseOps, useDeviceTypes, useDeviceLocs, - useDeviceSyms); +static mlir::omp::FlushOp +genFlushOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const std::optional &objectList, + const std::optional> + &clauseList) { + llvm::SmallVector operandRange; + genFlushClauses(converter, semaCtx, objectList, clauseList, loc, + operandRange); + + return converter.getFirOpBuilder().create( + converter.getCurrentLocation(), operandRange); +} - // This function implements the deprecated functionality of use_device_ptr - // that allows users to provide non-CPTR arguments to it with the caveat - // that the compiler will treat them as use_device_addr. A lot of legacy - // code may still depend on this functionality, so we should support it - // in some manner. We do so currently by simply shifting non-cptr operands - // from the use_device_ptr list into the front of the use_device_addr list - // whilst maintaining the ordering of useDeviceLocs, useDeviceSymbols and - // useDeviceTypes to use_device_ptr/use_device_addr input for BlockArg - // ordering. - // TODO: Perhaps create a user provideable compiler option that will - // re-introduce a hard-error rather than a warning in these cases. - promoteNonCPtrUseDevicePtrArgsToUseDeviceAddr(clauseOps, useDeviceTypes, - useDeviceLocs, useDeviceSyms); - cp.processMap(currentLocation, llvm::omp::Directive::OMPD_target_data, - stmtCtx, clauseOps); +static mlir::omp::MasterOp +genMasterOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location loc) { + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, loc, eval).setGenNested(genNested)); +} + +static mlir::omp::OrderedOp +genOrderedOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const Fortran::parser::OmpClauseList &clauseList) { + TODO(loc, "OMPD_ordered"); + return nullptr; +} + +static mlir::omp::OrderedRegionOp +genOrderedRegionOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location loc, + const Fortran::parser::OmpClauseList &clauseList) { + mlir::omp::OrderedRegionClauseOps clauseOps; + genOrderedRegionClauses(converter, semaCtx, clauseList, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, loc, eval).setGenNested(genNested), + clauseOps); +} + +static mlir::omp::ParallelOp +genParallelOp(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location loc, + const Fortran::parser::OmpClauseList &clauseList, + bool outerCombined = false) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + Fortran::lower::StatementContext stmtCtx; + mlir::omp::ParallelClauseOps clauseOps; + llvm::SmallVector privateSyms; + llvm::SmallVector reductionTypes; + llvm::SmallVector reductionSyms; + genParallelClauses(converter, semaCtx, stmtCtx, clauseList, loc, + /*processReduction=*/!outerCombined, clauseOps, + reductionTypes, reductionSyms); - auto dataOp = converter.getFirOpBuilder().create( - currentLocation, clauseOps); + auto reductionCallback = [&](mlir::Operation *op) { + genReductionVars(op, converter, loc, reductionSyms, reductionTypes); + return reductionSyms; + }; - genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, dataOp, - useDeviceTypes, useDeviceLocs, useDeviceSyms, - currentLocation); - return dataOp; -} + OpWithBodyGenInfo genInfo = + OpWithBodyGenInfo(converter, semaCtx, loc, eval) + .setGenNested(genNested) + .setOuterCombined(outerCombined) + .setClauses(&clauseList) + .setReductions(&reductionSyms, &reductionTypes) + .setGenRegionEntryCb(reductionCallback); -template -static OpTy genTargetEnterExitDataUpdateOp( - Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - Fortran::lower::StatementContext stmtCtx; - mlir::omp::TargetEnterExitUpdateDataClauseOps clauseOps; + if (!enableDelayedPrivatization) + return genOpWithBody(genInfo, clauseOps); - // GCC 9.3.0 emits a (probably) bogus warning about an unused variable. - [[maybe_unused]] llvm::omp::Directive directive; - if constexpr (std::is_same_v) { - directive = llvm::omp::Directive::OMPD_target_enter_data; - } else if constexpr (std::is_same_v) { - directive = llvm::omp::Directive::OMPD_target_exit_data; - } else if constexpr (std::is_same_v) { - directive = llvm::omp::Directive::OMPD_target_update; - } else { - return nullptr; - } + bool privatize = !outerCombined; + DataSharingProcessor dsp(converter, semaCtx, clauseList, eval, + /*useDelayedPrivatization=*/true, &symTable); - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(directive, clauseOps); - cp.processDevice(stmtCtx, clauseOps); - cp.processDepend(clauseOps); - cp.processNowait(clauseOps); + if (privatize) + dsp.processStep1(&clauseOps, &privateSyms); - if constexpr (std::is_same_v) { - cp.processMotionClauses(stmtCtx, clauseOps); - cp.processMotionClauses(stmtCtx, clauseOps); - } else { - cp.processMap(currentLocation, directive, stmtCtx, clauseOps); - } + auto genRegionEntryCB = [&](mlir::Operation *op) { + auto parallelOp = llvm::cast(op); - return firOpBuilder.create(currentLocation, clauseOps); -} + llvm::SmallVector reductionLocs( + clauseOps.reductionVars.size(), loc); -// This functions creates a block for the body of the targetOp's region. It adds -// all the symbols present in mapSymbols as block arguments to this block. -static void -genBodyOfTargetOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::omp::TargetOp &targetOp, - llvm::ArrayRef mapSyms, - llvm::ArrayRef mapSymLocs, - llvm::ArrayRef mapSymTypes, - const mlir::Location ¤tLocation) { - assert(mapSymTypes.size() == mapSymLocs.size()); + mlir::OperandRange privateVars = parallelOp.getPrivateVars(); + mlir::Region ®ion = parallelOp.getRegion(); - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::Region ®ion = targetOp.getRegion(); + llvm::SmallVector privateVarTypes = reductionTypes; + privateVarTypes.reserve(privateVarTypes.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(privateVarTypes), + [](mlir::Value v) { return v.getType(); }); - auto *regionBlock = - firOpBuilder.createBlock(®ion, {}, mapSymTypes, mapSymLocs); + llvm::SmallVector privateVarLocs = reductionLocs; + privateVarLocs.reserve(privateVarLocs.size() + privateVars.size()); + llvm::transform(privateVars, std::back_inserter(privateVarLocs), + [](mlir::Value v) { return v.getLoc(); }); - // Clones the `bounds` placing them inside the target region and returns them. - auto cloneBound = [&](mlir::Value bound) { - if (mlir::isMemoryEffectFree(bound.getDefiningOp())) { - mlir::Operation *clonedOp = bound.getDefiningOp()->clone(); - regionBlock->push_back(clonedOp); - return clonedOp->getResult(0); + firOpBuilder.createBlock(®ion, /*insertPt=*/{}, privateVarTypes, + privateVarLocs); + + llvm::SmallVector allSymbols = + reductionSyms; + allSymbols.append(privateSyms); + for (auto [arg, prv] : llvm::zip_equal(allSymbols, region.getArguments())) { + converter.bindSymbol(*arg, prv); } - TODO(converter.getCurrentLocation(), - "target map clause operand unsupported bound type"); - }; - auto cloneBounds = [cloneBound](llvm::ArrayRef bounds) { - llvm::SmallVector clonedBounds; - for (mlir::Value bound : bounds) - clonedBounds.emplace_back(cloneBound(bound)); - return clonedBounds; + return allSymbols; }; - // Bind the symbols to their corresponding block arguments. - for (auto [argIndex, argSymbol] : llvm::enumerate(mapSyms)) { - const mlir::BlockArgument &arg = region.getArgument(argIndex); - // Avoid capture of a reference to a structured binding. - const Fortran::semantics::Symbol *sym = argSymbol; - // Structure component symbols don't have bindings. - if (sym->owner().IsDerivedType()) - continue; - fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym); - extVal.match( - [&](const fir::BoxValue &v) { - converter.bindSymbol(*sym, - fir::BoxValue(arg, cloneBounds(v.getLBounds()), - v.getExplicitParameters(), - v.getExplicitExtents())); - }, - [&](const fir::MutableBoxValue &v) { - converter.bindSymbol( - *sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()), - v.getMutableProperties())); - }, - [&](const fir::ArrayBoxValue &v) { - converter.bindSymbol( - *sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()), - cloneBounds(v.getLBounds()), - v.getSourceBox())); - }, - [&](const fir::CharArrayBoxValue &v) { - converter.bindSymbol( - *sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()), - cloneBounds(v.getExtents()), - cloneBounds(v.getLBounds()))); - }, - [&](const fir::CharBoxValue &v) { - converter.bindSymbol(*sym, - fir::CharBoxValue(arg, cloneBound(v.getLen()))); - }, - [&](const fir::UnboxedValue &v) { converter.bindSymbol(*sym, arg); }, - [&](const auto &) { - TODO(converter.getCurrentLocation(), - "target map clause operand unsupported type"); - }); - } + // TODO Merge with the reduction CB. + genInfo.setGenRegionEntryCb(genRegionEntryCB).setDataSharingProcessor(&dsp); + return genOpWithBody(genInfo, clauseOps); +} - // Check if cloning the bounds introduced any dependency on the outer region. - // If so, then either clone them as well if they are MemoryEffectFree, or else - // copy them to a new temporary and add them to the map and block_argument - // lists and replace their uses with the new temporary. - llvm::SetVector valuesDefinedAbove; - mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); - while (!valuesDefinedAbove.empty()) { - for (mlir::Value val : valuesDefinedAbove) { - mlir::Operation *valOp = val.getDefiningOp(); - if (mlir::isMemoryEffectFree(valOp)) { - mlir::Operation *clonedOp = valOp->clone(); - regionBlock->push_front(clonedOp); - val.replaceUsesWithIf( - clonedOp->getResult(0), [regionBlock](mlir::OpOperand &use) { - return use.getOwner()->getBlock() == regionBlock; - }); - } else { - auto savedIP = firOpBuilder.getInsertionPoint(); - firOpBuilder.setInsertionPointAfter(valOp); - auto copyVal = - firOpBuilder.createTemporary(val.getLoc(), val.getType()); - firOpBuilder.createStoreWithConvert(copyVal.getLoc(), val, copyVal); +static mlir::omp::SectionOp +genSectionOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location loc, + const Fortran::parser::OmpClauseList &clauseList) { + // Currently only private/firstprivate clause is handled, and + // all privatization is done within `omp.section` operations. + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, loc, eval) + .setGenNested(genNested) + .setClauses(&clauseList)); +} - llvm::SmallVector bounds; - std::stringstream name; - firOpBuilder.setInsertionPoint(targetOp); - mlir::Value mapOp = createMapInfoOp( - firOpBuilder, copyVal.getLoc(), copyVal, mlir::Value{}, name.str(), - bounds, llvm::SmallVector{}, - static_cast< - std::underlying_type_t>( - llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT), - mlir::omp::VariableCaptureKind::ByCopy, copyVal.getType()); - targetOp.getMapOperandsMutable().append(mapOp); - mlir::Value clonedValArg = - region.addArgument(copyVal.getType(), copyVal.getLoc()); - firOpBuilder.setInsertionPointToStart(regionBlock); - auto loadOp = firOpBuilder.create(clonedValArg.getLoc(), - clonedValArg); - val.replaceUsesWithIf( - loadOp->getResult(0), [regionBlock](mlir::OpOperand &use) { - return use.getOwner()->getBlock() == regionBlock; - }); - firOpBuilder.setInsertionPoint(regionBlock, savedIP); - } - } - valuesDefinedAbove.clear(); - mlir::getUsedValuesDefinedAbove(region, valuesDefinedAbove); - } +static mlir::omp::SectionsOp +genSectionsOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const mlir::omp::SectionsClauseOps &clauseOps) { + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, loc, eval).setGenNested(false), + clauseOps); +} - // Insert dummy instruction to remember the insertion position. The - // marker will be deleted since there are not uses. - // In the HLFIR flow there are hlfir.declares inserted above while - // setting block arguments. - mlir::Value undefMarker = firOpBuilder.create( - targetOp.getOperation()->getLoc(), firOpBuilder.getIndexType()); +static mlir::omp::SimdLoopOp +genSimdLoopOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const Fortran::parser::OmpClauseList &clauseList) { + DataSharingProcessor dsp(converter, semaCtx, clauseList, eval); + dsp.processStep1(); - // Create blocks for unstructured regions. This has to be done since - // blocks are initially allocated with the function as the parent region. - if (eval.lowerAsUnstructured()) { - Fortran::lower::createEmptyRegionBlocks( - firOpBuilder, eval.getNestedEvaluations()); - } + Fortran::lower::StatementContext stmtCtx; + mlir::omp::SimdLoopClauseOps clauseOps; + llvm::SmallVector iv; + genSimdLoopClauses(converter, semaCtx, stmtCtx, eval, clauseList, loc, + clauseOps, iv); - firOpBuilder.create(currentLocation); + auto *nestedEval = + getCollapsedLoopEval(eval, Fortran::lower::getCollapseValue(clauseList)); - // Create the insertion point after the marker. - firOpBuilder.setInsertionPointAfter(undefMarker.getDefiningOp()); - if (genNested) - genNestedEvaluations(converter, eval); + auto ivCallback = [&](mlir::Operation *op) { + return genLoopVars(op, converter, loc, iv); + }; + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval) + .setClauses(&clauseList) + .setDataSharingProcessor(&dsp) + .setGenRegionEntryCb(ivCallback), + clauseOps); +} + +static mlir::omp::SingleOp +genSingleOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location loc, + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList &endClauseList) { + mlir::omp::SingleClauseOps clauseOps; + genSingleClauses(converter, semaCtx, beginClauseList, endClauseList, loc, + clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, loc, eval) + .setGenNested(genNested) + .setClauses(&beginClauseList), + clauseOps); } static mlir::omp::TargetOp genTargetOp(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, + mlir::Location loc, const Fortran::parser::OmpClauseList &clauseList, - llvm::omp::Directive directive, bool outerCombined = false) { + bool outerCombined = false) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); Fortran::lower::StatementContext stmtCtx; + + bool processHostOnlyClauses = + !llvm::cast(*converter.getModuleOp()) + .getIsTargetDevice(); + mlir::omp::TargetClauseOps clauseOps; - llvm::SmallVector mapTypes, devicePtrTypes, deviceAddrTypes; - llvm::SmallVector mapLocs, devicePtrLocs, deviceAddrLocs; llvm::SmallVector mapSyms, devicePtrSyms, deviceAddrSyms; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps); - cp.processDevice(stmtCtx, clauseOps); - cp.processThreadLimit(stmtCtx, clauseOps); - cp.processDepend(clauseOps); - cp.processNowait(clauseOps); - cp.processMap(currentLocation, directive, stmtCtx, clauseOps, &mapSyms, - &mapLocs, &mapTypes); - cp.processIsDevicePtr(clauseOps, devicePtrTypes, devicePtrLocs, - devicePtrSyms); - cp.processHasDeviceAddr(clauseOps, deviceAddrTypes, deviceAddrLocs, - deviceAddrSyms); - // TODO Support delayed privatization. - - cp.processTODO(currentLocation, - llvm::omp::Directive::OMPD_target); + llvm::SmallVector mapLocs, devicePtrLocs, deviceAddrLocs; + llvm::SmallVector mapTypes, devicePtrTypes, deviceAddrTypes; + genTargetClauses(converter, semaCtx, stmtCtx, clauseList, loc, + processHostOnlyClauses, /*processReduction=*/outerCombined, + clauseOps, mapSyms, mapLocs, mapTypes, deviceAddrSyms, + deviceAddrLocs, deviceAddrTypes, devicePtrSyms, + devicePtrLocs, devicePtrTypes); // 5.8.1 Implicit Data-Mapping Attribute Rules // The following code follows the implicit data-mapping rules to map all the @@ -1056,22 +1515,21 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, fir::ExtendedValue dataExv = converter.getSymbolExtendedValue(sym); name << sym.name().ToString(); - Fortran::lower::AddrAndBoundsInfo info = - getDataOperandBaseAddr(converter, converter.getFirOpBuilder(), sym, - converter.getCurrentLocation()); + Fortran::lower::AddrAndBoundsInfo info = getDataOperandBaseAddr( + converter, firOpBuilder, sym, converter.getCurrentLocation()); if (fir::unwrapRefType(info.addr.getType()).isa()) bounds = Fortran::lower::genBoundsOpsFromBox( - converter.getFirOpBuilder(), converter.getCurrentLocation(), - converter, dataExv, info); + firOpBuilder, converter.getCurrentLocation(), converter, + dataExv, info); if (fir::unwrapRefType(info.addr.getType()).isa()) { bool dataExvIsAssumedSize = Fortran::semantics::IsAssumedSizeArray(sym.GetUltimate()); bounds = Fortran::lower::genBaseBoundsOps( - converter.getFirOpBuilder(), converter.getCurrentLocation(), - converter, dataExv, dataExvIsAssumedSize); + firOpBuilder, converter.getCurrentLocation(), converter, dataExv, + dataExvIsAssumedSize); } llvm::omp::OpenMPOffloadMappingFlags mapFlag = @@ -1085,7 +1543,7 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, // If a variable is specified in declare target link and if device // type is not specified as `nohost`, it needs to be mapped tofrom - mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); + mlir::ModuleOp mod = firOpBuilder.getModule(); mlir::Operation *op = mod.lookupSymbol(converter.mangleName(sym)); auto declareTargetOp = llvm::dyn_cast_if_present(op); @@ -1105,8 +1563,8 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, } mlir::Value mapOp = createMapInfoOp( - converter.getFirOpBuilder(), baseOp.getLoc(), baseOp, mlir::Value{}, - name.str(), bounds, {}, + firOpBuilder, baseOp.getLoc(), baseOp, mlir::Value{}, name.str(), + bounds, {}, static_cast< std::underlying_type_t>( mapFlag), @@ -1118,341 +1576,147 @@ genTargetOp(Fortran::lower::AbstractConverter &converter, mapTypes.push_back(baseOp.getType()); } } - }; - Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap); - - auto targetOp = converter.getFirOpBuilder().create( - currentLocation, clauseOps); - - genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSyms, - mapLocs, mapTypes, currentLocation); - - return targetOp; -} - -static mlir::omp::TeamsOp -genTeamsOp(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - mlir::Location currentLocation, - const Fortran::parser::OmpClauseList &clauseList, - bool outerCombined = false) { - Fortran::lower::StatementContext stmtCtx; - mlir::omp::TeamsClauseOps clauseOps; - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processIf(llvm::omp::Directive::OMPD_teams, clauseOps); - cp.processAllocate(clauseOps); - cp.processDefault(); - cp.processNumTeams(stmtCtx, clauseOps); - cp.processThreadLimit(stmtCtx, clauseOps); - // TODO Support delayed privatization. - - cp.processTODO(currentLocation, - llvm::omp::Directive::OMPD_teams); - - return genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(genNested) - .setOuterCombined(outerCombined) - .setClauses(&clauseList), - clauseOps); -} - -/// Extract the list of function and variable symbols affected by the given -/// 'declare target' directive and return the intended device type for them. -static void getDeclareTargetInfo( - Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, - mlir::omp::DeclareTargetClauseOps &clauseOps, - llvm::SmallVectorImpl &symbolAndClause) { - const auto &spec = std::get( - declareTargetConstruct.t); - if (const auto *objectList{ - Fortran::parser::Unwrap(spec.u)}) { - ObjectList objects{makeObjects(*objectList, semaCtx)}; - // Case: declare target(func, var1, var2) - gatherFuncAndVarSyms(objects, mlir::omp::DeclareTargetCaptureClause::to, - symbolAndClause); - } else if (const auto *clauseList{ - Fortran::parser::Unwrap( - spec.u)}) { - if (clauseList->v.empty()) { - // Case: declare target, implicit capture of function - symbolAndClause.emplace_back( - mlir::omp::DeclareTargetCaptureClause::to, - eval.getOwningProcedure()->getSubprogramSymbol()); - } - - ClauseProcessor cp(converter, semaCtx, *clauseList); - cp.processTo(symbolAndClause); - cp.processEnter(symbolAndClause); - cp.processLink(symbolAndClause); - cp.processDeviceType(clauseOps); - cp.processTODO(converter.getCurrentLocation(), - llvm::omp::Directive::OMPD_declare_target); - } -} - -static void collectDeferredDeclareTargets( - Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPDeclareTargetConstruct &declareTargetConstruct, - llvm::SmallVectorImpl - &deferredDeclareTarget) { - mlir::omp::DeclareTargetClauseOps clauseOps; - llvm::SmallVector symbolAndClause; - getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, - clauseOps, symbolAndClause); - // Return the device type only if at least one of the targets for the - // directive is a function or subroutine - mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); - - for (const DeclareTargetCapturePair &symClause : symbolAndClause) { - mlir::Operation *op = mod.lookupSymbol(converter.mangleName( - std::get(symClause))); - - if (!op) { - deferredDeclareTarget.push_back({std::get<0>(symClause), - clauseOps.deviceType, - std::get<1>(symClause)}); - } - } -} - -static std::optional -getDeclareTargetFunctionDevice( - Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPDeclareTargetConstruct - &declareTargetConstruct) { - mlir::omp::DeclareTargetClauseOps clauseOps; - llvm::SmallVector symbolAndClause; - getDeclareTargetInfo(converter, semaCtx, eval, declareTargetConstruct, - clauseOps, symbolAndClause); - - // Return the device type only if at least one of the targets for the - // directive is a function or subroutine - mlir::ModuleOp mod = converter.getFirOpBuilder().getModule(); - for (const DeclareTargetCapturePair &symClause : symbolAndClause) { - mlir::Operation *op = mod.lookupSymbol(converter.mangleName( - std::get(symClause))); - - if (mlir::isa_and_nonnull(op)) - return clauseOps.deviceType; - } - - return std::nullopt; -} - -//===----------------------------------------------------------------------===// -// genOMP() Code generation helper functions -//===----------------------------------------------------------------------===// - -static void -genOmpSimpleStandalone(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, bool genNested, - const Fortran::parser::OpenMPSimpleStandaloneConstruct - &simpleStandaloneConstruct) { - const auto &directive = - std::get( - simpleStandaloneConstruct.t); - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - const auto &opClauseList = - std::get(simpleStandaloneConstruct.t); - mlir::Location currentLocation = converter.genLocation(directive.source); - - switch (directive.v) { - default: - break; - case llvm::omp::Directive::OMPD_barrier: - firOpBuilder.create(currentLocation); - break; - case llvm::omp::Directive::OMPD_taskwait: { - mlir::omp::TaskwaitClauseOps clauseOps; - ClauseProcessor cp(converter, semaCtx, opClauseList); - cp.processTODO( - currentLocation, llvm::omp::Directive::OMPD_taskwait); - firOpBuilder.create(currentLocation, clauseOps); - break; - } - case llvm::omp::Directive::OMPD_taskyield: - firOpBuilder.create(currentLocation); - break; - case llvm::omp::Directive::OMPD_target_data: - genTargetDataOp(converter, semaCtx, eval, genNested, currentLocation, - opClauseList); - break; - case llvm::omp::Directive::OMPD_target_enter_data: - genTargetEnterExitDataUpdateOp( - converter, semaCtx, currentLocation, opClauseList); - break; - case llvm::omp::Directive::OMPD_target_exit_data: - genTargetEnterExitDataUpdateOp( - converter, semaCtx, currentLocation, opClauseList); - break; - case llvm::omp::Directive::OMPD_target_update: - genTargetEnterExitDataUpdateOp( - converter, semaCtx, currentLocation, opClauseList); - break; - case llvm::omp::Directive::OMPD_ordered: - TODO(currentLocation, "OMPD_ordered"); - } -} - -static void -genOmpFlush(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPFlushConstruct &flushConstruct) { - llvm::SmallVector operandRange; - if (const auto &ompObjectList = - std::get>( - flushConstruct.t)) - genObjectList2(*ompObjectList, converter, operandRange); - const auto &memOrderClause = - std::get>>( - flushConstruct.t); - if (memOrderClause && memOrderClause->size() > 0) - TODO(converter.getCurrentLocation(), "Handle OmpMemoryOrderClause"); - converter.getFirOpBuilder().create( - converter.getCurrentLocation(), operandRange); -} - -static llvm::SmallVector -genLoopVars(mlir::Operation *op, Fortran::lower::AbstractConverter &converter, - mlir::Location &loc, - llvm::ArrayRef args) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - auto ®ion = op->getRegion(0); - - std::size_t loopVarTypeSize = 0; - for (const Fortran::semantics::Symbol *arg : args) - loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); - mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); - llvm::SmallVector tiv(args.size(), loopVarType); - llvm::SmallVector locs(args.size(), loc); - firOpBuilder.createBlock(®ion, {}, tiv, locs); - // The argument is not currently in memory, so make a temporary for the - // argument, and store it there, then bind that location to the argument. - mlir::Operation *storeOp = nullptr; - for (auto [argIndex, argSymbol] : llvm::enumerate(args)) { - mlir::Value indexVal = fir::getBase(region.front().getArgument(argIndex)); - storeOp = - createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); - } - firOpBuilder.setInsertionPointAfter(storeOp); - - return llvm::SmallVector(args); -} - -static llvm::SmallVector -genLoopAndReductionVars( - mlir::Operation *op, Fortran::lower::AbstractConverter &converter, - mlir::Location &loc, - llvm::ArrayRef loopArgs, - llvm::ArrayRef reductionArgs, - llvm::ArrayRef reductionTypes) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - - llvm::SmallVector blockArgTypes; - llvm::SmallVector blockArgLocs; - blockArgTypes.reserve(loopArgs.size() + reductionArgs.size()); - blockArgLocs.reserve(blockArgTypes.size()); - mlir::Block *entryBlock; - - if (loopArgs.size()) { - std::size_t loopVarTypeSize = 0; - for (const Fortran::semantics::Symbol *arg : loopArgs) - loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); - mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); - std::fill_n(std::back_inserter(blockArgTypes), loopArgs.size(), - loopVarType); - std::fill_n(std::back_inserter(blockArgLocs), loopArgs.size(), loc); - } - if (reductionArgs.size()) { - llvm::copy(reductionTypes, std::back_inserter(blockArgTypes)); - std::fill_n(std::back_inserter(blockArgLocs), reductionArgs.size(), loc); - } - entryBlock = firOpBuilder.createBlock(&op->getRegion(0), {}, blockArgTypes, - blockArgLocs); - // The argument is not currently in memory, so make a temporary for the - // argument, and store it there, then bind that location to the argument. - if (loopArgs.size()) { - mlir::Operation *storeOp = nullptr; - for (auto [argIndex, argSymbol] : llvm::enumerate(loopArgs)) { - mlir::Value indexVal = - fir::getBase(op->getRegion(0).front().getArgument(argIndex)); - storeOp = - createAndSetPrivatizedLoopVar(converter, loc, indexVal, argSymbol); - } - firOpBuilder.setInsertionPointAfter(storeOp); - } - // Bind the reduction arguments to their block arguments - for (auto [arg, prv] : llvm::zip_equal( - reductionArgs, - llvm::drop_begin(entryBlock->getArguments(), loopArgs.size()))) { - converter.bindSymbol(*arg, prv); - } + }; + Fortran::lower::pft::visitAllSymbols(eval, captureImplicitMap); - return llvm::SmallVector(loopArgs); + auto targetOp = firOpBuilder.create(loc, clauseOps); + genBodyOfTargetOp(converter, semaCtx, eval, genNested, targetOp, mapSyms, + mapLocs, mapTypes, loc); + return targetOp; } -static void -createSimdLoop(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - llvm::omp::Directive ompDirective, - const Fortran::parser::OmpClauseList &loopOpClauseList, - mlir::Location loc) { +static mlir::omp::TargetDataOp +genTargetDataOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location loc, + const Fortran::parser::OmpClauseList &clauseList) { + Fortran::lower::StatementContext stmtCtx; + mlir::omp::TargetDataClauseOps clauseOps; + llvm::SmallVector useDeviceTypes; + llvm::SmallVector useDeviceLocs; + llvm::SmallVector useDeviceSyms; + genTargetDataClauses(converter, semaCtx, stmtCtx, clauseList, loc, clauseOps, + useDeviceTypes, useDeviceLocs, useDeviceSyms); + + auto targetDataOp = + converter.getFirOpBuilder().create(loc, + clauseOps); + genBodyOfTargetDataOp(converter, semaCtx, eval, genNested, targetDataOp, + useDeviceTypes, useDeviceLocs, useDeviceSyms, loc); + return targetDataOp; +} + +template +static OpTy genTargetEnterExitUpdateDataOp( + Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, mlir::Location loc, + const Fortran::parser::OmpClauseList &clauseList) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - DataSharingProcessor dsp(converter, semaCtx, loopOpClauseList, eval); - dsp.processStep1(); + Fortran::lower::StatementContext stmtCtx; + + // GCC 9.3.0 emits a (probably) bogus warning about an unused variable. + [[maybe_unused]] llvm::omp::Directive directive; + if constexpr (std::is_same_v) { + directive = llvm::omp::Directive::OMPD_target_enter_data; + } else if constexpr (std::is_same_v) { + directive = llvm::omp::Directive::OMPD_target_exit_data; + } else if constexpr (std::is_same_v) { + directive = llvm::omp::Directive::OMPD_target_update; + } else { + llvm_unreachable("Unexpected TARGET DATA construct"); + } + + mlir::omp::TargetEnterExitUpdateDataClauseOps clauseOps; + genTargetEnterExitUpdateDataClauses(converter, semaCtx, stmtCtx, clauseList, + loc, directive, clauseOps); + + return firOpBuilder.create(loc, clauseOps); +} +static mlir::omp::TaskOp +genTaskOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location loc, + const Fortran::parser::OmpClauseList &clauseList) { Fortran::lower::StatementContext stmtCtx; - mlir::omp::SimdLoopClauseOps clauseOps; - llvm::SmallVector iv; + mlir::omp::TaskClauseOps clauseOps; + genTaskClauses(converter, semaCtx, stmtCtx, clauseList, loc, clauseOps); - ClauseProcessor cp(converter, semaCtx, loopOpClauseList); - cp.processCollapse(loc, eval, clauseOps, iv); - cp.processReduction(loc, clauseOps); - cp.processIf(llvm::omp::Directive::OMPD_simd, clauseOps); - cp.processSimdlen(clauseOps); - cp.processSafelen(clauseOps); - clauseOps.loopInclusiveAttr = firOpBuilder.getUnitAttr(); - // TODO Support delayed privatization. + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, loc, eval) + .setGenNested(genNested) + .setClauses(&clauseList), + clauseOps); +} - cp.processTODO(loc, ompDirective); +static mlir::omp::TaskgroupOp +genTaskgroupOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location loc, + const Fortran::parser::OmpClauseList &clauseList) { + mlir::omp::TaskgroupClauseOps clauseOps; + genTaskgroupClauses(converter, semaCtx, clauseList, loc, clauseOps); - auto *nestedEval = getCollapsedLoopEval( - eval, Fortran::lower::getCollapseValue(loopOpClauseList)); + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, loc, eval) + .setGenNested(genNested) + .setClauses(&clauseList), + clauseOps); +} - auto ivCallback = [&](mlir::Operation *op) { - return genLoopVars(op, converter, loc, iv); - }; +static mlir::omp::TaskloopOp +genTaskloopOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const Fortran::parser::OmpClauseList &clauseList) { + TODO(loc, "Taskloop construct"); +} - genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval) - .setClauses(&loopOpClauseList) - .setDataSharingProcessor(&dsp) - .setGenRegionEntryCb(ivCallback), +static mlir::omp::TaskwaitOp +genTaskwaitOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const Fortran::parser::OmpClauseList &clauseList) { + mlir::omp::TaskwaitClauseOps clauseOps; + genTaskwaitClauses(converter, semaCtx, clauseList, loc, clauseOps); + return converter.getFirOpBuilder().create(loc, + clauseOps); +} + +static mlir::omp::TaskyieldOp +genTaskyieldOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc) { + return converter.getFirOpBuilder().create(loc); +} + +static mlir::omp::TeamsOp +genTeamsOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, bool genNested, + mlir::Location loc, const Fortran::parser::OmpClauseList &clauseList, + bool outerCombined = false) { + Fortran::lower::StatementContext stmtCtx; + mlir::omp::TeamsClauseOps clauseOps; + genTeamsClauses(converter, semaCtx, stmtCtx, clauseList, loc, clauseOps); + + return genOpWithBody( + OpWithBodyGenInfo(converter, semaCtx, loc, eval) + .setGenNested(genNested) + .setOuterCombined(outerCombined) + .setClauses(&clauseList), clauseOps); } -static void createWsloop(Fortran::lower::AbstractConverter &converter, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - llvm::omp::Directive ompDirective, - const Fortran::parser::OmpClauseList &beginClauseList, - const Fortran::parser::OmpClauseList *endClauseList, - mlir::Location loc) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); +static mlir::omp::WsloopOp +genWsloopOp(Fortran::lower::AbstractConverter &converter, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, mlir::Location loc, + const Fortran::parser::OmpClauseList &beginClauseList, + const Fortran::parser::OmpClauseList *endClauseList) { DataSharingProcessor dsp(converter, semaCtx, beginClauseList, eval); dsp.processStep1(); @@ -1461,30 +1725,9 @@ static void createWsloop(Fortran::lower::AbstractConverter &converter, llvm::SmallVector iv; llvm::SmallVector reductionTypes; llvm::SmallVector reductionSyms; - - ClauseProcessor cp(converter, semaCtx, beginClauseList); - cp.processCollapse(loc, eval, clauseOps, iv); - cp.processSchedule(stmtCtx, clauseOps); - cp.processReduction(loc, clauseOps, &reductionTypes, &reductionSyms); - cp.processOrdered(clauseOps); - clauseOps.loopInclusiveAttr = firOpBuilder.getUnitAttr(); - // TODO Support delayed privatization. - - if (ReductionProcessor::doReductionByRef(clauseOps.reductionVars)) - clauseOps.reductionByRefAttr = firOpBuilder.getUnitAttr(); - - cp.processTODO(loc, - ompDirective); - - // In FORTRAN `nowait` clause occur at the end of `omp do` directive. - // i.e - // !$omp do - // <...> - // !$omp end do nowait - if (endClauseList) { - ClauseProcessor ecp(converter, semaCtx, *endClauseList); - ecp.processNowait(clauseOps); - } + genWsloopClauses(converter, semaCtx, stmtCtx, eval, beginClauseList, + endClauseList, loc, clauseOps, iv, reductionTypes, + reductionSyms); auto *nestedEval = getCollapsedLoopEval( eval, Fortran::lower::getCollapseValue(beginClauseList)); @@ -1494,7 +1737,7 @@ static void createWsloop(Fortran::lower::AbstractConverter &converter, reductionTypes); }; - genOpWithBody( + return genOpWithBody( OpWithBodyGenInfo(converter, semaCtx, loc, *nestedEval) .setClauses(&beginClauseList) .setDataSharingProcessor(&dsp) @@ -1503,7 +1746,11 @@ static void createWsloop(Fortran::lower::AbstractConverter &converter, clauseOps); } -static void createSimdWsloop( +//===----------------------------------------------------------------------===// +// Code generation functions for composite constructs +//===----------------------------------------------------------------------===// + +static void genCompositeDoSimd( Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, llvm::omp::Directive ompDirective, @@ -1511,7 +1758,7 @@ static void createSimdWsloop( const Fortran::parser::OmpClauseList *endClauseList, mlir::Location loc) { ClauseProcessor cp(converter, semaCtx, beginClauseList); cp.processTODO(loc, + clause::Order, clause::Safelen, clause::Simdlen>(loc, ompDirective); // TODO: Add support for vectorization - add vectorization hints inside loop // body. @@ -1521,34 +1768,7 @@ static void createSimdWsloop( // When support for vectorization is enabled, then we need to add handling of // if clause. Currently if clause can be skipped because we always assume // SIMD length = 1. - createWsloop(converter, semaCtx, eval, ompDirective, beginClauseList, - endClauseList, loc); -} - -static void -markDeclareTarget(mlir::Operation *op, - Fortran::lower::AbstractConverter &converter, - mlir::omp::DeclareTargetCaptureClause captureClause, - mlir::omp::DeclareTargetDeviceType deviceType) { - // TODO: Add support for program local variables with declare target applied - auto declareTargetOp = llvm::dyn_cast(op); - if (!declareTargetOp) - fir::emitFatalError( - converter.getCurrentLocation(), - "Attempt to apply declare target on unsupported operation"); - - // The function or global already has a declare target applied to it, very - // likely through implicit capture (usage in another declare target - // function/subroutine). It should be marked as any if it has been assigned - // both host and nohost, else we skip, as there is no change - if (declareTargetOp.isDeclareTarget()) { - if (declareTargetOp.getDeclareTargetDeviceType() != deviceType) - declareTargetOp.setDeclareTarget(mlir::omp::DeclareTargetDeviceType::any, - captureClause); - return; - } - - declareTargetOp.setDeclareTarget(deviceType, captureClause); + genWsloopOp(converter, semaCtx, eval, loc, beginClauseList, endClauseList); } //===----------------------------------------------------------------------===// @@ -1643,6 +1863,102 @@ genOMP(Fortran::lower::AbstractConverter &converter, ompDeclConstruct.u); } +//===----------------------------------------------------------------------===// +// OpenMPStandaloneConstruct visitors +//===----------------------------------------------------------------------===// + +static void genOMP(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPSimpleStandaloneConstruct + &simpleStandaloneConstruct) { + const auto &directive = + std::get( + simpleStandaloneConstruct.t); + const auto &clauseList = + std::get(simpleStandaloneConstruct.t); + mlir::Location currentLocation = converter.genLocation(directive.source); + + switch (directive.v) { + default: + break; + case llvm::omp::Directive::OMPD_barrier: + genBarrierOp(converter, semaCtx, eval, currentLocation); + break; + case llvm::omp::Directive::OMPD_taskwait: + genTaskwaitOp(converter, semaCtx, eval, currentLocation, clauseList); + break; + case llvm::omp::Directive::OMPD_taskyield: + genTaskyieldOp(converter, semaCtx, eval, currentLocation); + break; + case llvm::omp::Directive::OMPD_target_data: + genTargetDataOp(converter, semaCtx, eval, /*genNested=*/true, + currentLocation, clauseList); + break; + case llvm::omp::Directive::OMPD_target_enter_data: + genTargetEnterExitUpdateDataOp( + converter, semaCtx, currentLocation, clauseList); + break; + case llvm::omp::Directive::OMPD_target_exit_data: + genTargetEnterExitUpdateDataOp( + converter, semaCtx, currentLocation, clauseList); + break; + case llvm::omp::Directive::OMPD_target_update: + genTargetEnterExitUpdateDataOp( + converter, semaCtx, currentLocation, clauseList); + break; + case llvm::omp::Directive::OMPD_ordered: + genOrderedOp(converter, semaCtx, eval, currentLocation, clauseList); + break; + } +} + +static void +genOMP(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPFlushConstruct &flushConstruct) { + const auto &verbatim = std::get(flushConstruct.t); + const auto &objectList = + std::get>(flushConstruct.t); + const auto &clauseList = + std::get>>( + flushConstruct.t); + mlir::Location currentLocation = converter.genLocation(verbatim.source); + genFlushOp(converter, semaCtx, eval, currentLocation, objectList, clauseList); +} + +static void +genOMP(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPCancelConstruct &cancelConstruct) { + TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct"); +} + +static void genOMP(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPCancellationPointConstruct + &cancellationPointConstruct) { + TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct"); +} + +static void +genOMP(Fortran::lower::AbstractConverter &converter, + Fortran::lower::SymMap &symTable, + Fortran::semantics::SemanticsContext &semaCtx, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OpenMPStandaloneConstruct &standaloneConstruct) { + std::visit( + [&](auto &&s) { return genOMP(converter, symTable, semaCtx, eval, s); }, + standaloneConstruct.u); +} + //===----------------------------------------------------------------------===// // OpenMPConstruct visitors //===----------------------------------------------------------------------===// @@ -1774,7 +2090,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_target: genTargetOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, - beginClauseList, directive.v); + beginClauseList); break; case llvm::omp::Directive::OMPD_target_data: genTargetDataOp(converter, semaCtx, eval, /*genNested=*/true, @@ -1790,8 +2106,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, break; case llvm::omp::Directive::OMPD_teams: genTeamsOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, - beginClauseList, - /*outerCombined=*/false); + beginClauseList); break; case llvm::omp::Directive::OMPD_workshare: // FIXME: Workshare is not a commonly used OpenMP construct, an @@ -1813,8 +2128,7 @@ genOMP(Fortran::lower::AbstractConverter &converter, if ((llvm::omp::allTargetSet & llvm::omp::blockConstructSet) .test(directive.v)) { genTargetOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, - beginClauseList, directive.v, - /*outerCombined=*/true); + beginClauseList, /*outerCombined=*/true); combinedDirective = true; } if ((llvm::omp::allTeamsSet & llvm::omp::blockConstructSet) @@ -1851,44 +2165,13 @@ genOMP(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPCriticalConstruct &criticalConstruct) { - fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); - mlir::Location currentLocation = converter.getCurrentLocation(); - std::string name; - const Fortran::parser::OmpCriticalDirective &cd = + const auto &cd = std::get(criticalConstruct.t); - if (std::get>(cd.t).has_value()) { - name = - std::get>(cd.t).value().ToString(); - } - - mlir::omp::CriticalOp criticalOp = [&]() { - if (name.empty()) { - return firOpBuilder.create( - currentLocation, mlir::FlatSymbolRefAttr()); - } - - mlir::ModuleOp module = firOpBuilder.getModule(); - mlir::OpBuilder modBuilder(module.getBodyRegion()); - auto global = module.lookupSymbol(name); - if (!global) { - mlir::omp::CriticalClauseOps clauseOps; - const auto &clauseList = std::get(cd.t); - - ClauseProcessor cp(converter, semaCtx, clauseList); - cp.processHint(clauseOps); - clauseOps.nameAttr = - mlir::StringAttr::get(firOpBuilder.getContext(), name); - - global = modBuilder.create(currentLocation, - clauseOps); - } - - return firOpBuilder.create( - currentLocation, mlir::FlatSymbolRefAttr::get(firOpBuilder.getContext(), - global.getSymName())); - }(); - auto genInfo = OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval); - createBodyOfOp(criticalOp, genInfo); + const auto &clauseList = std::get(cd.t); + const auto &name = std::get>(cd.t); + mlir::Location currentLocation = converter.getCurrentLocation(); + genCriticalOp(converter, semaCtx, eval, /*genNested=*/true, currentLocation, + clauseList, name); } static void @@ -1907,7 +2190,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, const Fortran::parser::OpenMPLoopConstruct &loopConstruct) { const auto &beginLoopDirective = std::get(loopConstruct.t); - const auto &loopOpClauseList = + const auto &beginClauseList = std::get(beginLoopDirective.t); mlir::Location currentLocation = converter.genLocation(beginLoopDirective.source); @@ -1928,33 +2211,31 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, bool validDirective = false; if (llvm::omp::topTaskloopSet.test(ompDirective)) { validDirective = true; - TODO(currentLocation, "Taskloop construct"); + genTaskloopOp(converter, semaCtx, eval, currentLocation, beginClauseList); } else { // Create omp.{target, teams, distribute, parallel} nested operations if ((llvm::omp::allTargetSet & llvm::omp::loopConstructSet) .test(ompDirective)) { validDirective = true; genTargetOp(converter, semaCtx, eval, /*genNested=*/false, - currentLocation, loopOpClauseList, ompDirective, - /*outerCombined=*/true); + currentLocation, beginClauseList, /*outerCombined=*/true); } if ((llvm::omp::allTeamsSet & llvm::omp::loopConstructSet) .test(ompDirective)) { validDirective = true; genTeamsOp(converter, semaCtx, eval, /*genNested=*/false, currentLocation, - loopOpClauseList, - /*outerCombined=*/true); + beginClauseList, /*outerCombined=*/true); } if (llvm::omp::allDistributeSet.test(ompDirective)) { validDirective = true; - TODO(currentLocation, "Distribute construct"); + genDistributeOp(converter, semaCtx, eval, /*genNested=*/false, + currentLocation, beginClauseList); } if ((llvm::omp::allParallelSet & llvm::omp::loopConstructSet) .test(ompDirective)) { validDirective = true; genParallelOp(converter, symTable, semaCtx, eval, /*genNested=*/false, - currentLocation, loopOpClauseList, - /*outerCombined=*/true); + currentLocation, beginClauseList, /*outerCombined=*/true); } } if ((llvm::omp::allDoSet | llvm::omp::allSimdSet).test(ompDirective)) @@ -1968,16 +2249,14 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, if (llvm::omp::allDoSimdSet.test(ompDirective)) { // 2.9.3.2 Workshare SIMD construct - createSimdWsloop(converter, semaCtx, eval, ompDirective, loopOpClauseList, - endClauseList, currentLocation); - + genCompositeDoSimd(converter, semaCtx, eval, ompDirective, beginClauseList, + endClauseList, currentLocation); } else if (llvm::omp::allSimdSet.test(ompDirective)) { // 2.9.3.1 SIMD construct - createSimdLoop(converter, semaCtx, eval, ompDirective, loopOpClauseList, - currentLocation); + genSimdLoopOp(converter, semaCtx, eval, currentLocation, beginClauseList); } else { - createWsloop(converter, semaCtx, eval, ompDirective, loopOpClauseList, - endClauseList, currentLocation); + genWsloopOp(converter, semaCtx, eval, currentLocation, beginClauseList, + endClauseList); } } @@ -1997,44 +2276,39 @@ genOMP(Fortran::lower::AbstractConverter &converter, Fortran::semantics::SemanticsContext &semaCtx, Fortran::lower::pft::Evaluation &eval, const Fortran::parser::OpenMPSectionsConstruct §ionsConstruct) { - mlir::Location currentLocation = converter.getCurrentLocation(); - mlir::omp::SectionsClauseOps clauseOps; const auto &beginSectionsDirective = std::get(sectionsConstruct.t); - const auto §ionsClauseList = + const auto &beginClauseList = std::get(beginSectionsDirective.t); // Process clauses before optional omp.parallel, so that new variables are // allocated outside of the parallel region - ClauseProcessor cp(converter, semaCtx, sectionsClauseList); - cp.processSectionsReduction(currentLocation, clauseOps); - cp.processAllocate(clauseOps); - // TODO Support delayed privatization. + mlir::Location currentLocation = converter.getCurrentLocation(); + mlir::omp::SectionsClauseOps clauseOps; + genSectionsClauses(converter, semaCtx, beginClauseList, currentLocation, + /*clausesFromBeginSections=*/true, clauseOps); + // Parallel wrapper of PARALLEL SECTIONS construct llvm::omp::Directive dir = std::get(beginSectionsDirective.t) .v; - - // Parallel wrapper of PARALLEL SECTIONS construct if (dir == llvm::omp::Directive::OMPD_parallel_sections) { genParallelOp(converter, symTable, semaCtx, eval, - /*genNested=*/false, currentLocation, sectionsClauseList, + /*genNested=*/false, currentLocation, beginClauseList, /*outerCombined=*/true); } else { const auto &endSectionsDirective = std::get(sectionsConstruct.t); - const auto &endSectionsClauseList = + const auto &endClauseList = std::get(endSectionsDirective.t); - ClauseProcessor(converter, semaCtx, endSectionsClauseList) - .processNowait(clauseOps); + genSectionsClauses(converter, semaCtx, endClauseList, currentLocation, + /*clausesFromBeginSections=*/false, clauseOps); } - // SECTIONS construct - genOpWithBody( - OpWithBodyGenInfo(converter, semaCtx, currentLocation, eval) - .setGenNested(false), - clauseOps); + // SECTIONS construct. + genSectionsOp(converter, semaCtx, eval, currentLocation, clauseOps); + // Generate nested SECTION operations recursively. const auto §ionBlocks = std::get(sectionsConstruct.t); auto &firOpBuilder = converter.getFirOpBuilder(); @@ -2043,40 +2317,12 @@ genOMP(Fortran::lower::AbstractConverter &converter, llvm::zip(sectionBlocks.v, eval.getNestedEvaluations())) { symTable.pushScope(); genSectionOp(converter, semaCtx, neval, /*genNested=*/true, currentLocation, - sectionsClauseList); + beginClauseList); symTable.popScope(); firOpBuilder.restoreInsertionPoint(ip); } } -static void -genOMP(Fortran::lower::AbstractConverter &converter, - Fortran::lower::SymMap &symTable, - Fortran::semantics::SemanticsContext &semaCtx, - Fortran::lower::pft::Evaluation &eval, - const Fortran::parser::OpenMPStandaloneConstruct &standaloneConstruct) { - std::visit( - Fortran::common::visitors{ - [&](const Fortran::parser::OpenMPSimpleStandaloneConstruct - &simpleStandaloneConstruct) { - genOmpSimpleStandalone(converter, semaCtx, eval, - /*genNested=*/true, - simpleStandaloneConstruct); - }, - [&](const Fortran::parser::OpenMPFlushConstruct &flushConstruct) { - genOmpFlush(converter, semaCtx, eval, flushConstruct); - }, - [&](const Fortran::parser::OpenMPCancelConstruct &cancelConstruct) { - TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct"); - }, - [&](const Fortran::parser::OpenMPCancellationPointConstruct - &cancellationPointConstruct) { - TODO(converter.getCurrentLocation(), "OpenMPCancelConstruct"); - }, - }, - standaloneConstruct.u); -} - static void genOMP(Fortran::lower::AbstractConverter &converter, Fortran::lower::SymMap &symTable, Fortran::semantics::SemanticsContext &semaCtx, diff --git a/flang/test/Lower/OpenMP/FIR/target.f90 b/flang/test/Lower/OpenMP/FIR/target.f90 index 022327f9c25da..ca3162340d784 100644 --- a/flang/test/Lower/OpenMP/FIR/target.f90 +++ b/flang/test/Lower/OpenMP/FIR/target.f90 @@ -411,8 +411,8 @@ end subroutine omp_target_implicit_bounds !CHECK-LABEL: func.func @_QPomp_target_thread_limit() { subroutine omp_target_thread_limit integer :: a - !CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32 !CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "a"} + !CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32 !CHECK: omp.target thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] -> %[[ARG_0:.*]] : !fir.ref) { !CHECK: ^bb0(%[[ARG_0]]: !fir.ref): !$omp target map(tofrom: a) thread_limit(64) diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90 index 6f72b5a34d069..51b66327dfb24 100644 --- a/flang/test/Lower/OpenMP/target.f90 +++ b/flang/test/Lower/OpenMP/target.f90 @@ -490,8 +490,8 @@ end subroutine omp_target_implicit_bounds !CHECK-LABEL: func.func @_QPomp_target_thread_limit() { subroutine omp_target_thread_limit integer :: a - !CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32 !CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "a"} + !CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32 !CHECK: omp.target thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] -> %{{.*}} : !fir.ref) { !CHECK: ^bb0(%{{.*}}: !fir.ref): !$omp target map(tofrom: a) thread_limit(64) diff --git a/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 b/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 index 33b5971656010..d849dd206b943 100644 --- a/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 +++ b/flang/test/Lower/OpenMP/use-device-ptr-to-use-device-addr.f90 @@ -21,7 +21,7 @@ subroutine only_use_device_ptr !CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr() !CHECK: omp.target_data use_device_ptr({{.*}} : !fir.ref>) use_device_addr(%{{.*}}, %{{.*}} : !fir.ref>>>, !fir.ref>>>) { -!CHECK: ^bb0(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>>>, %{{.*}}: !fir.ref>>>): +!CHECK: ^bb0(%{{.*}}: !fir.ref>>>, %{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>>>): subroutine mix_use_device_ptr_and_addr use iso_c_binding integer, pointer, dimension(:) :: array @@ -47,7 +47,7 @@ subroutine only_use_device_addr !CHECK: func.func @{{.*}}mix_use_device_ptr_and_addr_and_map() !CHECK: omp.target_data map_entries(%{{.*}}, %{{.*}} : !fir.ref, !fir.ref) use_device_ptr(%{{.*}} : !fir.ref>) use_device_addr(%{{.*}}, %{{.*}} : !fir.ref>>>, !fir.ref>>>) { -!CHECK: ^bb0(%{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>>>, %{{.*}}: !fir.ref>>>): +!CHECK: ^bb0(%{{.*}}: !fir.ref>>>, %{{.*}}: !fir.ref>, %{{.*}}: !fir.ref>>>): subroutine mix_use_device_ptr_and_addr_and_map use iso_c_binding integer :: i, j