diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp index e62395676a696..44a955ae5c4dd 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.cpp @@ -206,10 +206,6 @@ getIfClauseOperand(lower::AbstractConverter &converter, // ClauseProcessor unique clauses //===----------------------------------------------------------------------===// -bool ClauseProcessor::processBare(mlir::omp::BareClauseOps &result) const { - return markClauseOccurrence(result.bare); -} - bool ClauseProcessor::processBind(mlir::omp::BindClauseOps &result) const { if (auto *clause = findUniqueClause()) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); diff --git a/flang/lib/Lower/OpenMP/ClauseProcessor.h b/flang/lib/Lower/OpenMP/ClauseProcessor.h index da920407b2164..95ac2a767e20d 100644 --- a/flang/lib/Lower/OpenMP/ClauseProcessor.h +++ b/flang/lib/Lower/OpenMP/ClauseProcessor.h @@ -57,7 +57,6 @@ class ClauseProcessor { : converter(converter), semaCtx(semaCtx), clauses(clauses) {} // 'Unique' clauses: They can appear at most once in the clause list. - bool processBare(mlir::omp::BareClauseOps &result) const; bool processBind(mlir::omp::BindClauseOps &result) const; bool processCancelDirectiveName( mlir::omp::CancelDirectiveNameClauseOps &result) const; diff --git a/flang/lib/Lower/OpenMP/OpenMP.cpp b/flang/lib/Lower/OpenMP/OpenMP.cpp index 6d93f245228a8..f2609df67eca0 100644 --- a/flang/lib/Lower/OpenMP/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP/OpenMP.cpp @@ -67,23 +67,12 @@ static void genOMPDispatch(lower::AbstractConverter &converter, const ConstructQueue &queue, ConstructQueue::const_iterator item); -static void processHostEvalClauses(lower::AbstractConverter &converter, - semantics::SemanticsContext &semaCtx, - lower::StatementContext &stmtCtx, - lower::pft::Evaluation &eval, - mlir::Location loc); - namespace { /// Structure holding information that is needed to pass host-evaluated /// information to later lowering stages. class HostEvalInfo { public: - // Allow this function access to private members in order to initialize them. - friend void ::processHostEvalClauses(lower::AbstractConverter &, - semantics::SemanticsContext &, - lower::StatementContext &, - lower::pft::Evaluation &, - mlir::Location); + friend class HostEvalPatternProcessor; /// Fill \c vars with values stored in \c ops. /// @@ -201,6 +190,393 @@ class HostEvalInfo { llvm::SmallVector iv; bool loopNestApplied = false, parallelApplied = false; }; + +class OpenMPPatternProcessor { +public: + OpenMPPatternProcessor(semantics::SemanticsContext &semaCtx) + : semaCtx(semaCtx) {} + virtual ~OpenMPPatternProcessor() = default; + + /// Run the pattern from the given evaluation. + void process(lower::pft::Evaluation &eval) { + dirsToProcess = initialDirsToProcess(); + processEval(eval); + } + +protected: + /// Returns the set of directives of interest at the beginning of the pattern. + virtual OmpDirectiveSet initialDirsToProcess() const = 0; + + /// Processes a single directive and, based on it, returns the set of other + /// directives of interest that would be part of the pattern if nested inside + /// of it. + virtual OmpDirectiveSet processDirective(lower::pft::Evaluation &eval, + llvm::omp::Directive dir) = 0; + + /// Obtain the list of clauses of the given OpenMP block or loop construct + /// evaluation. If it's not an OpenMP construct, no modifications are made to + /// the \c clauses output argument. + void extractClauses(lower::pft::Evaluation &eval, List &clauses) { + const auto *ompEval = eval.getIf(); + if (!ompEval) + return; + + const parser::OmpClauseList *beginClauseList = nullptr; + const parser::OmpClauseList *endClauseList = nullptr; + common::visit( + [&](const auto &construct) { + using Type = llvm::remove_cvref_t; + if constexpr (std::is_same_v || + std::is_same_v) { + beginClauseList = &construct.BeginDir().Clauses(); + if (auto &endSpec = construct.EndDir()) + endClauseList = &endSpec->Clauses(); + } + }, + ompEval->u); + + assert(beginClauseList && "expected begin directive"); + clauses.append(makeClauses(*beginClauseList, semaCtx)); + + if (endClauseList) + clauses.append(makeClauses(*endClauseList, semaCtx)); + } + +private: + /// Decide whether an evaluation must be processed as part of the pattern. + /// + /// This is the case whenever it's an OpenMP construct and the associated + /// directive is part of the current set of directives of interest. + bool shouldProcessEval(lower::pft::Evaluation &eval) const { + const auto *ompEval = eval.getIf(); + if (!ompEval) + return false; + + return dirsToProcess.test(parser::omp::GetOmpDirectiveName(*ompEval).v); + } + + /// Processes an evaluation and, potentially, recursively process a single + /// nested evaluation. + /// + /// For a nested evaluation to be recursively processed, it must be an OpenMP + /// construct, have no sibling evaluations and match one of the + /// next-directives of interest set returned by a call to \c processDirective + /// on the parent evaluation. + void processEval(lower::pft::Evaluation &eval) { + if (!shouldProcessEval(eval)) + return; + + const auto &ompEval = eval.get(); + OmpDirectiveSet processNested = + processDirective(eval, parser::omp::GetOmpDirectiveName(ompEval).v); + + if (processNested.empty()) + return; + + if (lower::pft::Evaluation *nestedEval = extractOnlyOmpNestedEval(eval)) { + OmpDirectiveSet prevDirs = dirsToProcess; + dirsToProcess = processNested; + processEval(*nestedEval); + dirsToProcess = prevDirs; + } + } + + /// Return the directive that is immediately nested inside of the given + /// \c parent evaluation, if it is its only non-end-statement nested + /// evaluation and it represents an OpenMP construct. + lower::pft::Evaluation * + extractOnlyOmpNestedEval(lower::pft::Evaluation &parent) { + if (!parent.hasNestedEvaluations()) + return nullptr; + + auto &nested = parent.getFirstNestedEvaluation(); + if (!nested.isA()) + return nullptr; + + for (auto &sibling : parent.getNestedEvaluations()) + if (&sibling != &nested && !sibling.isEndStmt()) + return nullptr; + + return &nested; + } + +protected: + semantics::SemanticsContext &semaCtx; + +private: + OmpDirectiveSet dirsToProcess; +}; + +/// Helper pattern to navigate target SPMD patterns. +class TargetSPMDPatternProcessor : public OpenMPPatternProcessor { +public: + using OpenMPPatternProcessor::OpenMPPatternProcessor; + virtual ~TargetSPMDPatternProcessor() = default; + +protected: + virtual OmpDirectiveSet initialDirsToProcess() const override { + return llvm::omp::allTargetSet; + } + + virtual OmpDirectiveSet processDirective(lower::pft::Evaluation &, + llvm::omp::Directive dir) override { + using namespace llvm::omp; + + // The default implementation does nothing, except it returns the allowed + // single nested directives for an SPMD kernel. If called by subclasses, it + // helps navigate SPMD patterns. + // + // Patterns considered SPMD: + // - target teams distribute parallel do [simd] + // - target teams loop + // - target parallel do [simd] + // - target parallel loop + switch (dir) { + case OMPD_target: + return topTeamsSet | topParallelSet; + case OMPD_target_teams: + case OMPD_teams: + return topDistributeSet | topLoopSet; + case OMPD_target_parallel: + case OMPD_parallel: + return topLoopSet | topDoSet; + default: + return {}; + } + } +}; + +/// Populates the given host eval info structure after processing clauses for +/// the given \p eval OpenMP target construct, or nested constructs, if these +/// must be evaluated outside of the target region per the spec. +/// +/// In particular, this will ensure that in 'target teams' and equivalent nested +/// constructs, the \c thread_limit, \c num_teams and \c num_threads clauses +/// will be evaluated in the host. Additionally, loop bounds and steps will also +/// be evaluated in the host if a 'target teams distribute' or target SPMD +/// construct is detected (i.e. 'target teams distribute parallel do [simd]', +/// 'target parallel do [simd]' or equivalent nesting). +/// +/// The resulting updated \c HostEvalInfo structure is intended to be used to +/// populate the \c host_eval operands of the associated \c omp.target +/// operation, and also to be checked and used by later lowering steps to +/// populate the corresponding operands of the \c omp.teams, \c omp.parallel or +/// \c omp.loop_nest operations. +class HostEvalPatternProcessor : public TargetSPMDPatternProcessor { +public: + HostEvalPatternProcessor(lower::AbstractConverter &converter, + semantics::SemanticsContext &semaCtx, + lower::StatementContext &stmtCtx, mlir::Location loc, + HostEvalInfo &hostEvalInfo) + : TargetSPMDPatternProcessor(semaCtx), converter(converter), + stmtCtx(stmtCtx), loc(loc), hostEvalInfo(hostEvalInfo) {} + virtual ~HostEvalPatternProcessor() = default; + +protected: + virtual OmpDirectiveSet processDirective(lower::pft::Evaluation &eval, + llvm::omp::Directive dir) override { + using namespace llvm::omp; + + List clauses; + extractClauses(eval, clauses); + ClauseProcessor cp(converter, semaCtx, clauses); + + // Currently, we deal differently with e.g. `target parallel workshare` to + // `target parallel` with a single nested `workshare`. The first case would + // result in no clauses being evaluated in the host, as there's not a case + // for it in the below switch statement. The second case would evaluate + // `num_threads` clauses in the host, because `target parallel` could be + // followed by a `do` construct, which would make this an SPMD target + // region. + // + // TODO: We don't probably want to have such divergent behavior when dealing + // with combined directives. We need to revisit this logic without listing + // every possible combined directive containing a clause we'd otherwise + // evaluate in the host if the directive was split into its leafs. + switch (dir) { + case OMPD_teams_distribute_parallel_do: + case OMPD_teams_distribute_parallel_do_simd: + cp.processThreadLimit(stmtCtx, hostEvalInfo.ops); + [[fallthrough]]; + case OMPD_target_teams_distribute_parallel_do: + case OMPD_target_teams_distribute_parallel_do_simd: + cp.processNumTeams(stmtCtx, hostEvalInfo.ops); + [[fallthrough]]; + case OMPD_distribute_parallel_do: + case OMPD_distribute_parallel_do_simd: + case OMPD_target_parallel_do: + case OMPD_target_parallel_do_simd: + case OMPD_target_parallel_loop: + case OMPD_parallel_do: + case OMPD_parallel_do_simd: + case OMPD_parallel_loop: + cp.processNumThreads(stmtCtx, hostEvalInfo.ops); + [[fallthrough]]; + case OMPD_distribute: + case OMPD_distribute_simd: + case OMPD_do: + case OMPD_do_simd: + cp.processCollapse(loc, eval, hostEvalInfo.ops, hostEvalInfo.ops, + hostEvalInfo.iv); + return {}; + + case OMPD_teams: + cp.processThreadLimit(stmtCtx, hostEvalInfo.ops); + [[fallthrough]]; + case OMPD_target_teams: + cp.processNumTeams(stmtCtx, hostEvalInfo.ops); + break; + + case OMPD_teams_distribute: + case OMPD_teams_distribute_simd: + cp.processThreadLimit(stmtCtx, hostEvalInfo.ops); + [[fallthrough]]; + case OMPD_target_teams_distribute: + case OMPD_target_teams_distribute_simd: + cp.processCollapse(loc, eval, hostEvalInfo.ops, hostEvalInfo.ops, + hostEvalInfo.iv); + cp.processNumTeams(stmtCtx, hostEvalInfo.ops); + return {}; + + case OMPD_teams_loop: + cp.processThreadLimit(stmtCtx, hostEvalInfo.ops); + [[fallthrough]]; + case OMPD_target_teams_loop: + cp.processNumTeams(stmtCtx, hostEvalInfo.ops); + [[fallthrough]]; + case OMPD_loop: + cp.processCollapse(loc, eval, hostEvalInfo.ops, hostEvalInfo.ops, + hostEvalInfo.iv); + return {}; + + case OMPD_teams_workdistribute: + cp.processThreadLimit(stmtCtx, hostEvalInfo.ops); + [[fallthrough]]; + case OMPD_target_teams_workdistribute: + cp.processNumTeams(stmtCtx, hostEvalInfo.ops); + break; + + case OMPD_target_parallel: + case OMPD_parallel: + cp.processNumThreads(stmtCtx, hostEvalInfo.ops); + break; + + case OMPD_target: + break; + + default: + return {}; + } + + // Visit nested directives as per the SPMD pattern. + return TargetSPMDPatternProcessor::processDirective(eval, dir); + } + +private: + lower::AbstractConverter &converter; + lower::StatementContext &stmtCtx; + mlir::Location loc; + HostEvalInfo &hostEvalInfo; +}; + +/// Checks target regions and, based on the directives and clauses encountered, +/// determines its associated kernel type. +class KernelTypePatternProcessor : protected TargetSPMDPatternProcessor { +public: + KernelTypePatternProcessor(semantics::SemanticsContext &semaCtx, + mlir::ModuleOp moduleOp) + : TargetSPMDPatternProcessor(semaCtx), moduleOp(moduleOp) {} + virtual ~KernelTypePatternProcessor() = default; + + /// Executes the pattern and returns the kernel type of the given target + /// region, or \c mlir::omp::TargetExecMode::generic by default for non-target + /// evaluations. + mlir::omp::TargetExecMode getKernelType(lower::pft::Evaluation &eval) { + execMode = mlir::omp::TargetExecMode::generic; + process(eval); + return execMode; + } + +protected: + virtual OmpDirectiveSet processDirective(lower::pft::Evaluation &eval, + llvm::omp::Directive dir) override { + using namespace llvm::omp; + + switch (dir) { + case OMPD_target: + case OMPD_target_parallel: + case OMPD_parallel: + case OMPD_teams: + break; + case OMPD_target_teams: + if (hasOmpxBareClause(eval)) { + execMode = mlir::omp::TargetExecMode::bare; + return {}; + } + break; + case OMPD_target_teams_distribute_parallel_do: + case OMPD_target_teams_distribute_parallel_do_simd: + case OMPD_target_teams_loop: + case OMPD_target_parallel_do: + case OMPD_target_parallel_do_simd: + case OMPD_target_parallel_loop: + case OMPD_teams_distribute_parallel_do: + case OMPD_teams_distribute_parallel_do_simd: + case OMPD_teams_loop: + case OMPD_distribute_parallel_do: + case OMPD_distribute_parallel_do_simd: + case OMPD_loop: + case OMPD_parallel_do: + case OMPD_parallel_do_simd: + case OMPD_do: + case OMPD_do_simd: + execMode = canPromoteSPMDToNoLoop(eval) + ? mlir::omp::TargetExecMode::spmd_no_loop + : mlir::omp::TargetExecMode::spmd; + return {}; + default: + return {}; + } + + // Visit nested directives as per the SPMD pattern. + return TargetSPMDPatternProcessor::processDirective(eval, dir); + } + +private: + bool canPromoteSPMDToNoLoop(lower::pft::Evaluation &eval) { + List clauses; + extractClauses(eval, clauses); + + // First make sure the proper module attributes are present in order to + // perform this optimization. + auto ompFlags = + llvm::cast(*moduleOp).getFlags(); + if (!ompFlags || !ompFlags.getAssumeTeamsOversubscription() || + !ompFlags.getAssumeThreadsOversubscription()) + return false; + + // The num_teams clause can break no-loop assumptions, and reductions are + // slower in no-loop mode. + return llvm::find_if(clauses, [](const Clause &clause) { + return std::holds_alternative(clause.u) || + std::holds_alternative(clause.u); + }) == clauses.end(); + } + + bool hasOmpxBareClause(lower::pft::Evaluation &eval) { + List clauses; + extractClauses(eval, clauses); + + return llvm::find_if(clauses, [](const Clause &clause) { + return std::holds_alternative(clause.u); + }) != clauses.end(); + } + +private: + mlir::ModuleOp moduleOp; + mlir::omp::TargetExecMode execMode; +}; + } // namespace /// Stack of \see HostEvalInfo to represent the current nest of \c omp.target @@ -384,187 +760,6 @@ extractMappedBaseValues(llvm::ArrayRef vars, }); } -/// Populate the global \see hostEvalInfo after processing clauses for the given -/// \p eval OpenMP target construct, or nested constructs, if these must be -/// evaluated outside of the target region per the spec. -/// -/// In particular, this will ensure that in 'target teams' and equivalent nested -/// constructs, the \c thread_limit and \c num_teams clauses will be evaluated -/// in the host. Additionally, loop bounds, steps and the \c num_threads clause -/// will also be evaluated in the host if a target SPMD construct is detected -/// (i.e. 'target teams distribute parallel do [simd]' or equivalent nesting). -/// -/// The result, stored as a global, is intended to be used to populate the \c -/// host_eval operands of the associated \c omp.target operation, and also to be -/// checked and used by later lowering steps to populate the corresponding -/// operands of the \c omp.teams, \c omp.parallel or \c omp.loop_nest -/// operations. -static void processHostEvalClauses(lower::AbstractConverter &converter, - semantics::SemanticsContext &semaCtx, - lower::StatementContext &stmtCtx, - lower::pft::Evaluation &eval, - mlir::Location loc) { - // Obtain the list of clauses of the given OpenMP block or loop construct - // evaluation. Other evaluations passed to this lambda keep `clauses` - // unchanged. - auto extractClauses = [&semaCtx](lower::pft::Evaluation &eval, - List &clauses) { - const auto *ompEval = eval.getIf(); - if (!ompEval) - return; - - const parser::OmpClauseList *beginClauseList = nullptr; - const parser::OmpClauseList *endClauseList = nullptr; - common::visit( - [&](const auto &construct) { - using Type = llvm::remove_cvref_t; - if constexpr (std::is_same_v || - std::is_same_v) { - beginClauseList = &construct.BeginDir().Clauses(); - if (auto &endSpec = construct.EndDir()) - endClauseList = &endSpec->Clauses(); - } - }, - ompEval->u); - - assert(beginClauseList && "expected begin directive"); - clauses.append(makeClauses(*beginClauseList, semaCtx)); - - if (endClauseList) - clauses.append(makeClauses(*endClauseList, semaCtx)); - }; - - // Return the directive that is immediately nested inside of the given - // `parent` evaluation, if it is its only non-end-statement nested evaluation - // and it represents an OpenMP construct. - auto extractOnlyOmpNestedDir = [](lower::pft::Evaluation &parent) - -> std::optional { - if (!parent.hasNestedEvaluations()) - return std::nullopt; - - llvm::omp::Directive dir; - auto &nested = parent.getFirstNestedEvaluation(); - if (const auto *ompEval = nested.getIf()) - dir = parser::omp::GetOmpDirectiveName(*ompEval).v; - else - return std::nullopt; - - for (auto &sibling : parent.getNestedEvaluations()) - if (&sibling != &nested && !sibling.isEndStmt()) - return std::nullopt; - - return dir; - }; - - // Process the given evaluation assuming it's part of a 'target' construct or - // captured by one, and store results in the global `hostEvalInfo`. - std::function &)> - processEval; - processEval = [&](lower::pft::Evaluation &eval, const List &clauses) { - using namespace llvm::omp; - ClauseProcessor cp(converter, semaCtx, clauses); - - // Call `processEval` recursively with the immediately nested evaluation and - // its corresponding clauses if there is a single nested evaluation - // representing an OpenMP directive that passes the given test. - auto processSingleNestedIf = [&](llvm::function_ref test) { - std::optional nestedDir = extractOnlyOmpNestedDir(eval); - if (!nestedDir || !test(*nestedDir)) - return; - - lower::pft::Evaluation &nestedEval = eval.getFirstNestedEvaluation(); - List nestedClauses; - extractClauses(nestedEval, nestedClauses); - processEval(nestedEval, nestedClauses); - }; - - const auto *ompEval = eval.getIf(); - if (!ompEval) - return; - - HostEvalInfo *hostInfo = getHostEvalInfoStackTop(converter); - assert(hostInfo && "expected HOST_EVAL info structure"); - - switch (parser::omp::GetOmpDirectiveName(*ompEval).v) { - case OMPD_teams_distribute_parallel_do: - case OMPD_teams_distribute_parallel_do_simd: - cp.processThreadLimit(stmtCtx, hostInfo->ops); - [[fallthrough]]; - case OMPD_target_teams_distribute_parallel_do: - case OMPD_target_teams_distribute_parallel_do_simd: - cp.processNumTeams(stmtCtx, hostInfo->ops); - [[fallthrough]]; - case OMPD_distribute_parallel_do: - case OMPD_distribute_parallel_do_simd: - cp.processNumThreads(stmtCtx, hostInfo->ops); - [[fallthrough]]; - case OMPD_distribute: - case OMPD_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); - break; - - case OMPD_teams: - cp.processThreadLimit(stmtCtx, hostInfo->ops); - [[fallthrough]]; - case OMPD_target_teams: - cp.processNumTeams(stmtCtx, hostInfo->ops); - processSingleNestedIf([](Directive nestedDir) { - return topDistributeSet.test(nestedDir) || topLoopSet.test(nestedDir); - }); - break; - - case OMPD_teams_distribute: - case OMPD_teams_distribute_simd: - cp.processThreadLimit(stmtCtx, hostInfo->ops); - [[fallthrough]]; - case OMPD_target_teams_distribute: - case OMPD_target_teams_distribute_simd: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); - cp.processNumTeams(stmtCtx, hostInfo->ops); - break; - - case OMPD_teams_loop: - cp.processThreadLimit(stmtCtx, hostInfo->ops); - [[fallthrough]]; - case OMPD_target_teams_loop: - cp.processNumTeams(stmtCtx, hostInfo->ops); - [[fallthrough]]; - case OMPD_loop: - cp.processCollapse(loc, eval, hostInfo->ops, hostInfo->ops, hostInfo->iv); - break; - - case OMPD_teams_workdistribute: - cp.processThreadLimit(stmtCtx, hostInfo->ops); - [[fallthrough]]; - case OMPD_target_teams_workdistribute: - cp.processNumTeams(stmtCtx, hostInfo->ops); - break; - - // Standalone 'target' case. - case OMPD_target: { - processSingleNestedIf( - [](Directive nestedDir) { return topTeamsSet.test(nestedDir); }); - break; - } - default: - break; - } - }; - - const auto *ompEval = eval.getIf(); - assert(ompEval && - llvm::omp::allTargetSet.test( - parser::omp::GetOmpDirectiveName(*ompEval).v) && - "expected TARGET construct evaluation"); - (void)ompEval; - - // Use the whole list of clauses passed to the construct here, rather than the - // ones only applied to omp.target. - List clauses; - extractClauses(eval, clauses); - processEval(eval, clauses); -} - static lower::pft::Evaluation * getCollapsedLoopEval(lower::pft::Evaluation &eval, int collapseValue) { // Return the Evaluation of the innermost collapsed loop, or the current one @@ -1717,20 +1912,21 @@ static void genTargetClauses( lower::AbstractConverter &converter, semantics::SemanticsContext &semaCtx, lower::SymMap &symTable, lower::StatementContext &stmtCtx, lower::pft::Evaluation &eval, const List &clauses, - mlir::Location loc, mlir::omp::TargetOperands &clauseOps, + mlir::Location loc, mlir::omp::TargetExtOperands &clauseOps, DefaultMapsTy &defaultMaps, llvm::SmallVectorImpl &hasDeviceAddrSyms, llvm::SmallVectorImpl &isDevicePtrSyms, llvm::SmallVectorImpl &mapSyms) { ClauseProcessor cp(converter, semaCtx, clauses); - cp.processBare(clauseOps); cp.processDefaultMap(stmtCtx, defaultMaps); cp.processDepend(symTable, stmtCtx, clauseOps); cp.processDevice(stmtCtx, clauseOps); cp.processHasDeviceAddr(stmtCtx, clauseOps, hasDeviceAddrSyms); if (HostEvalInfo *hostEvalInfo = getHostEvalInfoStackTop(converter)) { // Only process host_eval if compiling for the host device. - processHostEvalClauses(converter, semaCtx, stmtCtx, eval, loc); + HostEvalPatternProcessor processor(converter, semaCtx, stmtCtx, loc, + *hostEvalInfo); + processor.process(eval); hostEvalInfo->collectValues(clauseOps.hostEvalVars); } cp.processIf(llvm::omp::Directive::OMPD_target, clauseOps); @@ -2710,7 +2906,7 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable, if (!isTargetDevice) converter.getStateStack().stackPush(); - mlir::omp::TargetOperands clauseOps; + mlir::omp::TargetExtOperands clauseOps; DefaultMapsTy defaultMaps; llvm::SmallVector mapSyms, isDevicePtrSyms, hasDeviceAddrSyms; @@ -2718,6 +2914,10 @@ genTargetOp(lower::AbstractConverter &converter, lower::SymMap &symTable, loc, clauseOps, defaultMaps, hasDeviceAddrSyms, isDevicePtrSyms, mapSyms); + KernelTypePatternProcessor processor(semaCtx, converter.getModuleOp()); + clauseOps.kernelType = mlir::omp::TargetExecModeAttr::get( + &converter.getMLIRContext(), processor.getKernelType(eval)); + if (!isDevicePtrSyms.empty()) { // is_device_ptr maps get duplicated so the clause and synthesized // has_device_addr entry each own a unique MapInfoOp user, keeping diff --git a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp index ff346e79276ce..ea8f4d7161534 100644 --- a/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp +++ b/flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp @@ -302,7 +302,7 @@ class DoConcurrentConversion llvm::cast(*module) .getIsTargetDevice(); - mlir::omp::TargetOperands targetClauseOps; + mlir::omp::TargetExtOperands targetClauseOps; genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, loopNestClauseOps, isTargetDevice ? nullptr : &targetClauseOps); @@ -318,6 +318,8 @@ class DoConcurrentConversion {liveIn, TargetDeclareShapeCreationInfo(liveIn)}); } + targetClauseOps.kernelType = mlir::omp::TargetExecModeAttr::get( + rewriter.getContext(), mlir::omp::TargetExecMode::spmd); targetOp = genTargetOp(doLoop.getLoc(), rewriter, mapper, loopNestLiveIns, targetClauseOps, loopNestClauseOps, liveInShapeInfoMap); @@ -456,7 +458,7 @@ class DoConcurrentConversion mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, fir::DoConcurrentLoopOp loop, mlir::omp::LoopNestOperands &loopNestClauseOps, - mlir::omp::TargetOperands *targetClauseOps = nullptr) const { + mlir::omp::TargetExtOperands *targetClauseOps = nullptr) const { assert(loopNestClauseOps.loopLowerBounds.empty() && "Loop nest bounds were already emitted!"); @@ -594,7 +596,7 @@ class DoConcurrentConversion mlir::omp::TargetOp genTargetOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter, mlir::IRMapping &mapper, llvm::ArrayRef mappedVars, - mlir::omp::TargetOperands &clauseOps, + mlir::omp::TargetExtOperands &clauseOps, mlir::omp::LoopNestOperands &loopNestClauseOps, const LiveInShapeInfoMap &liveInShapeInfoMap) const { auto targetOp = mlir::omp::TargetOp::create(rewriter, loc, clauseOps); diff --git a/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp b/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp index 2c7980064500f..105be54c4c79f 100644 --- a/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp +++ b/flang/lib/Optimizer/OpenMP/LowerWorkdistribute.cpp @@ -758,15 +758,15 @@ FailureOr splitTargetData(omp::TargetOp targetOp, // Create the inner target op auto newTargetOp = omp::TargetOp::create( rewriter, targetOp.getLoc(), targetOp.getAllocateVars(), - targetOp.getAllocatorVars(), targetOp.getBareAttr(), - targetOp.getDependKindsAttr(), targetOp.getDependVars(), - targetOp.getDevice(), targetOp.getHasDeviceAddrVars(), - targetOp.getHostEvalVars(), targetOp.getIfExpr(), - targetOp.getInReductionVars(), targetOp.getInReductionByrefAttr(), - targetOp.getInReductionSymsAttr(), targetOp.getIsDevicePtrVars(), - innerMapInfos, targetOp.getNowaitAttr(), targetOp.getPrivateVars(), - targetOp.getPrivateSymsAttr(), targetOp.getPrivateNeedsBarrierAttr(), - targetOp.getThreadLimitVars(), targetOp.getPrivateMapsAttr()); + targetOp.getAllocatorVars(), targetOp.getDependKindsAttr(), + targetOp.getDependVars(), targetOp.getDevice(), + targetOp.getHasDeviceAddrVars(), targetOp.getHostEvalVars(), + targetOp.getIfExpr(), targetOp.getInReductionVars(), + targetOp.getInReductionByrefAttr(), targetOp.getInReductionSymsAttr(), + targetOp.getIsDevicePtrVars(), innerMapInfos, targetOp.getNowaitAttr(), + targetOp.getPrivateVars(), targetOp.getPrivateSymsAttr(), + targetOp.getPrivateNeedsBarrierAttr(), targetOp.getThreadLimitVars(), + targetOp.getPrivateMapsAttr(), targetOp.getKernelTypeAttr()); rewriter.inlineRegionBefore(targetOp.getRegion(), newTargetOp.getRegion(), newTargetOp.getRegion().begin()); rewriter.replaceOp(targetOp, targetDataOp); @@ -1478,15 +1478,15 @@ genPreTargetOp(omp::TargetOp targetOp, SmallVector &preMapOperands, // update the hostEvalVars of preTargetOp omp::TargetOp preTargetOp = omp::TargetOp::create( rewriter, targetOp.getLoc(), targetOp.getAllocateVars(), - targetOp.getAllocatorVars(), targetOp.getBareAttr(), - targetOp.getDependKindsAttr(), targetOp.getDependVars(), - targetOp.getDevice(), targetOp.getHasDeviceAddrVars(), preHostEvalVars, - targetOp.getIfExpr(), targetOp.getInReductionVars(), - targetOp.getInReductionByrefAttr(), targetOp.getInReductionSymsAttr(), - targetOp.getIsDevicePtrVars(), preMapOperands, targetOp.getNowaitAttr(), - targetOp.getPrivateVars(), targetOp.getPrivateSymsAttr(), - targetOp.getPrivateNeedsBarrierAttr(), targetOp.getThreadLimitVars(), - targetOp.getPrivateMapsAttr()); + targetOp.getAllocatorVars(), targetOp.getDependKindsAttr(), + targetOp.getDependVars(), targetOp.getDevice(), + targetOp.getHasDeviceAddrVars(), preHostEvalVars, targetOp.getIfExpr(), + targetOp.getInReductionVars(), targetOp.getInReductionByrefAttr(), + targetOp.getInReductionSymsAttr(), targetOp.getIsDevicePtrVars(), + preMapOperands, targetOp.getNowaitAttr(), targetOp.getPrivateVars(), + targetOp.getPrivateSymsAttr(), targetOp.getPrivateNeedsBarrierAttr(), + targetOp.getThreadLimitVars(), targetOp.getPrivateMapsAttr(), + targetOp.getKernelTypeAttr()); auto *preTargetBlock = rewriter.createBlock( &preTargetOp.getRegion(), preTargetOp.getRegion().begin(), {}, {}); IRMapping preMapping; @@ -1568,15 +1568,17 @@ genIsolatedTargetOp(omp::TargetOp targetOp, SmallVector &postMapOperands, // Create the isolated target op omp::TargetOp isolatedTargetOp = omp::TargetOp::create( rewriter, targetOp.getLoc(), targetOp.getAllocateVars(), - targetOp.getAllocatorVars(), targetOp.getBareAttr(), - targetOp.getDependKindsAttr(), targetOp.getDependVars(), - targetOp.getDevice(), targetOp.getHasDeviceAddrVars(), - isolatedHostEvalVars, targetOp.getIfExpr(), targetOp.getInReductionVars(), + targetOp.getAllocatorVars(), targetOp.getDependKindsAttr(), + targetOp.getDependVars(), targetOp.getDevice(), + targetOp.getHasDeviceAddrVars(), isolatedHostEvalVars, + targetOp.getIfExpr(), targetOp.getInReductionVars(), targetOp.getInReductionByrefAttr(), targetOp.getInReductionSymsAttr(), targetOp.getIsDevicePtrVars(), postMapOperands, targetOp.getNowaitAttr(), targetOp.getPrivateVars(), targetOp.getPrivateSymsAttr(), targetOp.getPrivateNeedsBarrierAttr(), targetOp.getThreadLimitVars(), - targetOp.getPrivateMapsAttr()); + targetOp.getPrivateMapsAttr(), + omp::TargetExecModeAttr::get(targetOp->getContext(), + omp::TargetExecMode::spmd)); auto *isolatedTargetBlock = rewriter.createBlock(&isolatedTargetOp.getRegion(), isolatedTargetOp.getRegion().begin(), {}, {}); @@ -1648,15 +1650,15 @@ static omp::TargetOp genPostTargetOp(omp::TargetOp targetOp, // Create the post target op omp::TargetOp postTargetOp = omp::TargetOp::create( rewriter, targetOp.getLoc(), targetOp.getAllocateVars(), - targetOp.getAllocatorVars(), targetOp.getBareAttr(), - targetOp.getDependKindsAttr(), targetOp.getDependVars(), - targetOp.getDevice(), targetOp.getHasDeviceAddrVars(), postHostEvalVars, - targetOp.getIfExpr(), targetOp.getInReductionVars(), - targetOp.getInReductionByrefAttr(), targetOp.getInReductionSymsAttr(), - targetOp.getIsDevicePtrVars(), postMapOperands, targetOp.getNowaitAttr(), - targetOp.getPrivateVars(), targetOp.getPrivateSymsAttr(), - targetOp.getPrivateNeedsBarrierAttr(), targetOp.getThreadLimitVars(), - targetOp.getPrivateMapsAttr()); + targetOp.getAllocatorVars(), targetOp.getDependKindsAttr(), + targetOp.getDependVars(), targetOp.getDevice(), + targetOp.getHasDeviceAddrVars(), postHostEvalVars, targetOp.getIfExpr(), + targetOp.getInReductionVars(), targetOp.getInReductionByrefAttr(), + targetOp.getInReductionSymsAttr(), targetOp.getIsDevicePtrVars(), + postMapOperands, targetOp.getNowaitAttr(), targetOp.getPrivateVars(), + targetOp.getPrivateSymsAttr(), targetOp.getPrivateNeedsBarrierAttr(), + targetOp.getThreadLimitVars(), targetOp.getPrivateMapsAttr(), + targetOp.getKernelTypeAttr()); // Create the block for postTargetOp auto *postTargetBlock = rewriter.createBlock( &postTargetOp.getRegion(), postTargetOp.getRegion().begin(), {}, {}); diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-1.fir b/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-1.fir index f2dd66fd942aa..4363dc5476132 100644 --- a/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-1.fir +++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-1.fir @@ -45,7 +45,7 @@ func.func @_QPTestAllocatableArray() { %19 = fir.box_offset %3#1 base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> %20 = omp.map.info var_ptr(%3#1 : !fir.ref>>>, !fir.array) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%19 : !fir.llvm_ptr>>) bounds(%18) -> !fir.llvm_ptr>> {name = ""} %21 = omp.map.info var_ptr(%3#1 : !fir.ref>>>, !fir.box>>) map_clauses(implicit, tofrom) capture(ByRef) members(%20 : [0] : !fir.llvm_ptr>>) -> !fir.ref>>> {name = "b"} - omp.target map_entries(%11 -> %arg0, %12 -> %arg1, %20 -> %arg2, %21 -> %arg3 : !fir.llvm_ptr>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref>>>) { + omp.target kernel_type(generic) map_entries(%11 -> %arg0, %12 -> %arg1, %20 -> %arg2, %21 -> %arg3 : !fir.llvm_ptr>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref>>>) { %22:2 = hlfir.declare %arg1 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFEa"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %23:2 = hlfir.declare %arg3 {fortran_attrs = #fir.var_attrs, uniq_name = "_QFEb"} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) %24 = fir.load %23#0 : !fir.ref>>> diff --git a/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-2.fir b/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-2.fir index c6b2e29a7188a..e1974a07b977e 100644 --- a/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-2.fir +++ b/flang/test/Analysis/AliasAnalysis/alias-analysis-omp-target-2.fir @@ -49,7 +49,7 @@ func.func @_QPTestTargetData(%arg0: !fir.ref> {fir.bindc_name %13 = arith.subi %c10_1, %c1_4 : index %14 = omp.map.bounds lower_bound(%c0_5 : index) upper_bound(%13 : index) extent(%c10_1 : index) stride(%c1_4 : index) start_idx(%c1_4 : index) %15 = omp.map.info var_ptr(%6#1 : !fir.ref>, !fir.array<10xf32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%14) -> !fir.ref> {name = "p"} - omp.target map_entries(%12 -> %arg3, %15 -> %arg4 : !fir.ref>, !fir.ref>) { + omp.target kernel_type(generic) map_entries(%12 -> %arg3, %15 -> %arg4 : !fir.ref>, !fir.ref>) { %c10_10 = arith.constant 10 : index %22 = fir.shape %c10_10 : (index) -> !fir.shape<1> %23:2 = hlfir.declare %arg3(%22) {uniq_name = "_QFtest_target_dataEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) @@ -74,7 +74,7 @@ func.func @_QPTestTargetData(%arg0: !fir.ref> {fir.bindc_name %19 = arith.subi %c10_1, %c1_8 : index %20 = omp.map.bounds lower_bound(%c0_9 : index) upper_bound(%19 : index) extent(%c10_1 : index) stride(%c1_8 : index) start_idx(%c1_8 : index) %21 = omp.map.info var_ptr(%6#1 : !fir.ref>, !fir.array<10xf32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%20) -> !fir.ref> {name = "p"} - omp.target map_entries(%18 -> %arg3, %21 -> %arg4 : !fir.ref>, !fir.ref>) { + omp.target kernel_type(generic) map_entries(%18 -> %arg3, %21 -> %arg4 : !fir.ref>, !fir.ref>) { %c10_10 = arith.constant 10 : index %22 = fir.shape %c10_10 : (index) -> !fir.shape<1> %23:2 = hlfir.declare %arg3(%22) {uniq_name = "_QFtest_target_dataEb"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) diff --git a/flang/test/Fir/OpenMP/bounds-generation-for-char-arrays.f90 b/flang/test/Fir/OpenMP/bounds-generation-for-char-arrays.f90 index d9d54ee72edb8..d2e11189c1e24 100644 --- a/flang/test/Fir/OpenMP/bounds-generation-for-char-arrays.f90 +++ b/flang/test/Fir/OpenMP/bounds-generation-for-char-arrays.f90 @@ -9,7 +9,7 @@ module attributes {omp.is_target_device = false} { %0 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c9 : index) extent(%c10 : index) stride(%c1 : index) start_idx(%c1 : index) %1 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c9 : index) extent(%c10 : index) stride(%c1 : index) start_idx(%c1 : index) %2 = omp.map.info var_ptr(%arg0 : !fir.ref>>, !fir.array<10x10x!fir.char<1,16>>) map_clauses(tofrom) capture(ByRef) bounds(%0, %1) -> !fir.ref>> {name = ""} - omp.target map_entries(%2 -> %arg1 : !fir.ref>>) { + omp.target kernel_type(generic) map_entries(%2 -> %arg1 : !fir.ref>>) { omp.terminator } return @@ -29,7 +29,7 @@ module attributes {omp.is_target_device = false} { // CHECK: %[[VAL_9:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[VAL_10:.*]] = omp.map.bounds lower_bound(%[[VAL_6]] : i64) upper_bound(%[[VAL_7]] : i64) extent(%[[VAL_7]] : i64) stride(%[[VAL_8]] : i64) start_idx(%[[VAL_9]] : i64) // CHECK: %[[VAL_11:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, i8) map_clauses(tofrom) capture(ByRef) bounds(%[[VAL_4]], %[[VAL_5]], %[[VAL_10]]) -> !llvm.ptr {name = ""} -// CHECK: omp.target map_entries(%[[VAL_11]] -> %[[VAL_12:.*]] : !llvm.ptr) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[VAL_11]] -> %[[VAL_12:.*]] : !llvm.ptr) { // CHECK: omp.terminator // CHECK: } // CHECK: llvm.return @@ -49,7 +49,7 @@ module attributes {omp.is_target_device = false} { %8 = fir.box_offset %arg0 base_addr : (!fir.ref>>>>) -> !fir.llvm_ptr>>> %9 = omp.map.info var_ptr(%arg0 : !fir.ref>>>>, !fir.char<1,16>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%8 : !fir.llvm_ptr>>>) bounds(%3, %7) -> !fir.llvm_ptr>>> {name = ""} %10 = omp.map.info var_ptr(%arg0 : !fir.ref>>>>, !fir.box>>>) map_clauses(to) capture(ByRef) members(%9 : [0] : !fir.llvm_ptr>>>) -> !fir.ref>>>> {name = "csv_chem_list_a"} - omp.target map_entries(%10 -> %arg1, %9 -> %arg2 : !fir.ref>>>>, !fir.llvm_ptr>>>) { + omp.target kernel_type(generic) map_entries(%10 -> %arg1, %9 -> %arg2 : !fir.ref>>>>, !fir.llvm_ptr>>>) { omp.terminator } return @@ -88,7 +88,7 @@ module attributes {omp.is_target_device = false} { // CHECK: %[[VAL_27:.*]] = omp.map.bounds lower_bound(%[[VAL_23]] : i64) upper_bound(%[[VAL_24]] : i64) extent(%[[VAL_24]] : i64) stride(%[[VAL_25]] : i64) start_idx(%[[VAL_26]] : i64) // CHECK: %[[VAL_28:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, i8) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_22]] : !llvm.ptr) bounds(%[[VAL_12]], %[[VAL_21]], %[[VAL_27]]) -> !llvm.ptr {name = ""} // CHECK: %[[VAL_29:.*]] = omp.map.info var_ptr(%[[ARG0]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<2 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%[[VAL_28]] : [0] : !llvm.ptr) -> !llvm.ptr {name = "csv_chem_list_a"} -// CHECK: omp.target map_entries(%[[VAL_29]] -> %[[VAL_30:.*]], %[[VAL_28]] -> %[[VAL_31:.*]] : !llvm.ptr, !llvm.ptr) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[VAL_29]] -> %[[VAL_30:.*]], %[[VAL_28]] -> %[[VAL_31:.*]] : !llvm.ptr, !llvm.ptr) { // CHECK: omp.terminator // CHECK: } // CHECK: llvm.return diff --git a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir index 30ed2f0f2f760..19e618aaa3c04 100644 --- a/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir +++ b/flang/test/Fir/convert-to-llvm-openmp-and-fir.fir @@ -443,7 +443,7 @@ func.func @_QPomp_target() { %1 = arith.subi %c512, %c1 : index %2 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%1 : index) extent(%c512 : index) stride(%c1 : index) start_idx(%c1 : index) %3 = omp.map.info var_ptr(%0 : !fir.ref>, !fir.array<512xi32>) map_clauses(tofrom) capture(ByRef) bounds(%2) -> !fir.ref> {name = "a"} - omp.target thread_limit(%c64_i32 : i32) map_entries(%3 -> %arg0 : !fir.ref>) { + omp.target kernel_type(generic) thread_limit(%c64_i32 : i32) map_entries(%3 -> %arg0 : !fir.ref>) { %c10_i32 = arith.constant 10 : i32 %c1_i64 = arith.constant 1 : i64 %c1_i64_0 = arith.constant 1 : i64 @@ -465,7 +465,7 @@ func.func @_QPomp_target() { // CHECK: %[[UPPER:.*]] = llvm.mlir.constant(511 : index) : i64 // CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%[[LOWER]] : i64) upper_bound(%[[UPPER]] : i64) extent(%[[EXTENT]] : i64) stride(%[[STRIDE]] : i64) start_idx(%[[STRIDE]] : i64) // CHECK: %[[MAP:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !llvm.ptr, !llvm.array<512 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !llvm.ptr {name = "a"} -// CHECK: omp.target thread_limit(%[[VAL_2]] : i32) map_entries(%[[MAP]] -> %[[ARG_0:.*]] : !llvm.ptr) { +// CHECK: omp.target kernel_type(generic) thread_limit(%[[VAL_2]] : i32) map_entries(%[[MAP]] -> %[[ARG_0:.*]] : !llvm.ptr) { // CHECK: %[[VAL_3:.*]] = llvm.mlir.constant(10 : i32) : i32 // CHECK: %[[VAL_4:.*]] = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[VAL_5:.*]] = llvm.mlir.constant(1 : i64) : i64 @@ -955,8 +955,8 @@ func.func @omp_map_info_derived_type_explicit_member_conversion(%arg0 : !fir.ref %5 = omp.map.info var_ptr(%4 : !fir.ref, f32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "dtype%real"} // CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFderived_type", (f32, array<10 x i32>, i32)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_1]], %[[MAP_MEMBER_2]] : [2], [0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "dtype", partial_map = true} %6 = omp.map.info var_ptr(%arg0 : !fir.ref,int:i32}>>, !fir.type<_QFderived_type{real:f32,array:!fir.array<10xi32>,int:i32}>) map_clauses(tofrom) capture(ByRef) members(%2, %5 : [2], [0] : !fir.ref, !fir.ref) -> !fir.ref,int:i32}>> {name = "dtype", partial_map = true} - // CHECK: omp.target map_entries(%[[MAP_MEMBER_1]] -> %[[ARG_1:.*]], %[[MAP_MEMBER_2]] -> %[[ARG_2:.*]], %[[MAP_PARENT]] -> %[[ARG_3:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) { - omp.target map_entries(%2 -> %arg1, %5 -> %arg2, %6 -> %arg3 : !fir.ref, !fir.ref, !fir.ref,int:i32}>>) { + // CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_MEMBER_1]] -> %[[ARG_1:.*]], %[[MAP_MEMBER_2]] -> %[[ARG_2:.*]], %[[MAP_PARENT]] -> %[[ARG_3:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%2 -> %arg1, %5 -> %arg2, %6 -> %arg3 : !fir.ref, !fir.ref, !fir.ref,int:i32}>>) { omp.terminator } return @@ -980,8 +980,8 @@ func.func @omp_map_info_nested_derived_type_explicit_member_conversion(%arg0 : ! %7 = omp.map.info var_ptr(%6 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref // CHECK: %[[PARENT_MAP:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.struct<"_QFTtop_layer", (array<10 x i32>, struct<"_QFTbottom_layer", (array<10 x f32>, f64)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_1]], %[[MAP_MEMBER_2]] : [1, 1], [2] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true} %9 = omp.map.info var_ptr(%arg0 : !fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>, !fir.type<_QFTtop_layer{array_i:!fir.array<10xi32>,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>) map_clauses(tofrom) capture(ByRef) members(%4, %7 : [1,1], [2] : !fir.ref, !fir.ref) -> !fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>> {partial_map = true} - // CHECK: omp.target map_entries(%[[MAP_MEMBER_1]] -> %{{.*}}, %[[MAP_MEMBER_2]] -> %{{.*}}, %[[PARENT_MAP]] -> %{{.*}} : !llvm.ptr, !llvm.ptr, !llvm.ptr) { - omp.target map_entries(%4 -> %arg1, %7 -> %arg2, %9 -> %arg3 : !fir.ref, !fir.ref, !fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>) { + // CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_MEMBER_1]] -> %{{.*}}, %[[MAP_MEMBER_2]] -> %{{.*}}, %[[PARENT_MAP]] -> %{{.*}} : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%4 -> %arg1, %7 -> %arg2, %9 -> %arg3 : !fir.ref, !fir.ref, !fir.ref,nested:!fir.type<_QFTbottom_layer{array_i2:!fir.array<10xf32>,i2:f64}>,k:i32}>>) { omp.terminator } return @@ -993,7 +993,7 @@ func.func @omp_map_info_nested_derived_type_explicit_member_conversion(%arg0 : ! // CHECK: %[[ADDR_OF:.*]] = llvm.mlir.addressof @var_common_ : !llvm.ptr // CHECK: %[[CB_MAP:.*]] = omp.map.info var_ptr(%[[ADDR_OF]] : !llvm.ptr, !llvm.array<8 x i8>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "var_common"} -// CHECK: omp.target map_entries(%[[CB_MAP]] -> %[[ARG0:.*]] : !llvm.ptr) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[CB_MAP]] -> %[[ARG0:.*]] : !llvm.ptr) { // CHECK: %[[VAR_2_OFFSET:.*]] = llvm.mlir.constant(4 : index) : i64 // CHECK: %[[VAR_1_OFFSET:.*]] = llvm.mlir.constant(0 : index) : i64 // CHECK: %{{.*}} = llvm.getelementptr %[[ARG0]][%[[VAR_1_OFFSET]]] : (!llvm.ptr, i64) -> !llvm.ptr, i8 @@ -1002,7 +1002,7 @@ func.func @omp_map_info_nested_derived_type_explicit_member_conversion(%arg0 : ! func.func @omp_map_common_block_using_common_block_symbol() { %0 = fir.address_of(@var_common_) : !fir.ref> %1 = omp.map.info var_ptr(%0 : !fir.ref>, !fir.array<8xi8>) map_clauses(tofrom) capture(ByRef) -> !fir.ref> {name = "var_common"} - omp.target map_entries(%1 -> %arg0 : !fir.ref>) { + omp.target kernel_type(generic) map_entries(%1 -> %arg0 : !fir.ref>) { %c4 = arith.constant 4 : index %c0 = arith.constant 0 : index %c20_i32 = arith.constant 20 : i32 @@ -1033,7 +1033,7 @@ fir.global common @var_common_(dense<0> : vector<8xi8>) {alignment = 4 : i64} : // CHECK: %[[VAR_2_CB_GEP:.*]] = llvm.getelementptr %[[ADDR_OF]][%[[VAR_2_OFFSET]]] : (!llvm.ptr, i64) -> !llvm.ptr, i8 // CHECK: %[[MAP_CB_VAR_1:.*]] = omp.map.info var_ptr(%[[VAR_1_CB_GEP]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "var1"} // CHECK: %[[MAP_CB_VAR_2:.*]] = omp.map.info var_ptr(%[[VAR_2_CB_GEP]] : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "var2"} -// CHECK: omp.target map_entries(%[[MAP_CB_VAR_1]] -> %[[ARG0:.*]], %[[MAP_CB_VAR_2]] -> %[[ARG1:.*]] : !llvm.ptr, !llvm.ptr) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_CB_VAR_1]] -> %[[ARG0:.*]], %[[MAP_CB_VAR_2]] -> %[[ARG1:.*]] : !llvm.ptr, !llvm.ptr) { func.func @omp_map_common_block_using_common_block_members() { %c4 = arith.constant 4 : index @@ -1047,7 +1047,7 @@ func.func @omp_map_common_block_using_common_block_members() { %6 = fir.convert %5 : (!fir.ref) -> !fir.ref %7 = omp.map.info var_ptr(%3 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "var1"} %8 = omp.map.info var_ptr(%6 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "var2"} - omp.target map_entries(%7 -> %arg0, %8 -> %arg1 : !fir.ref, !fir.ref) { + omp.target kernel_type(generic) map_entries(%7 -> %arg0, %8 -> %arg1 : !fir.ref, !fir.ref) { %c10_i32 = arith.constant 10 : i32 %9 = fir.load %arg0 : !fir.ref %10 = arith.muli %9, %c10_i32 : i32 @@ -1133,8 +1133,8 @@ func.func @map_dtype_alloca_mem(%arg0 : !fir.ref>>>, !fir.box>>) map_clauses(tofrom) capture(ByRef) -> !fir.ref>>> // CHECK: %[[MAP_PARENT_DTYPE:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, [[STRUCT_TY]]) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_DESCRIPTOR]], %[[MAP_MEMBER_BADDR]] : [4], [4, 0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true} %5 = omp.map.info var_ptr(%arg0 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>, !fir.type<_QFRecTy{i:f32,scalar:!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>) map_clauses(tofrom) capture(ByRef) members(%4, %3 : [4], [4,0] : !fir.ref>>>, !fir.llvm_ptr>>) -> !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>> {partial_map = true} - // CHECK: omp.target map_entries(%[[MAP_MEMBER_DESCRIPTOR]] -> %[[ARG_1:.*]], %[[MAP_MEMBER_BADDR]] -> %[[ARG_2:.*]], %[[MAP_PARENT_DTYPE]] -> %[[ARG_3:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) { - omp.target map_entries(%4 -> %arg1, %3 -> %arg2, %5 -> %arg3 : !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>) { + // CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_MEMBER_DESCRIPTOR]] -> %[[ARG_1:.*]], %[[MAP_MEMBER_BADDR]] -> %[[ARG_2:.*]], %[[MAP_PARENT_DTYPE]] -> %[[ARG_3:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%4 -> %arg1, %3 -> %arg2, %5 -> %arg3 : !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>) { omp.terminator } return @@ -1179,8 +1179,8 @@ func.func @map_dtype_alloca_mem2(%arg0 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>>, !fir.type<_QFRecTy{i:f32,scalar:!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%9 : !fir.llvm_ptr>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>) -> !fir.llvm_ptr>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>> // CHECK: %[[MAP_DTYPE_PARENT_DESC:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, [[DESC_TY]]) map_clauses(tofrom) capture(ByRef) members(%[[MAP_DTYPE_PARENT_BADDR]], %[[MAP_MEMBER_DESC]], %[[MAP_MEMBER_BADDR]], %[[MAP_REGULAR_MEMBER]] : [0], [0, 4], [0, 4, 0], [0, 5] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr %11 = omp.map.info var_ptr(%arg0 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>>, !fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>) map_clauses(tofrom) capture(ByRef) members(%10, %5, %4, %8 : [0], [0,4], [0,4,0], [0,5] : !fir.llvm_ptr>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) -> !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>> - // CHECK: omp.target map_entries(%[[MAP_DTYPE_PARENT_BADDR]] -> %[[ARG_1:.*]], %[[MAP_MEMBER_DESC]] -> %[[ARG_2:.*]], %[[MAP_MEMBER_BADDR]] -> %[[ARG_3:.*]], %[[MAP_REGULAR_MEMBER]] -> %[[ARG_4:.*]], %[[MAP_DTYPE_PARENT_DESC]] -> %[[ARG_5:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { - omp.target map_entries(%10 -> %arg1, %5 -> %arg2, %4 -> %arg3, %8 -> %arg4, %11 -> %arg5 : !fir.llvm_ptr>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref, !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>>) { + // CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_DTYPE_PARENT_BADDR]] -> %[[ARG_1:.*]], %[[MAP_MEMBER_DESC]] -> %[[ARG_2:.*]], %[[MAP_MEMBER_BADDR]] -> %[[ARG_3:.*]], %[[MAP_REGULAR_MEMBER]] -> %[[ARG_4:.*]], %[[MAP_DTYPE_PARENT_DESC]] -> %[[ARG_5:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%10 -> %arg1, %5 -> %arg2, %4 -> %arg3, %8 -> %arg4, %11 -> %arg5 : !fir.llvm_ptr>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref, !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>>) { omp.terminator } return @@ -1231,8 +1231,8 @@ func.func @map_nested_dtype_alloca_mem(%arg0 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>>, !fir.type<_QFRecTy{i:f32,scalar:!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%11 : !fir.llvm_ptr>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>) -> !fir.llvm_ptr>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>> // CHECK: %[[MAP_PARENT_DESC:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, [[DESC_TY]]) map_clauses(tofrom) capture(ByRef) members(%[[MAP_PARENT_BADDR]], %[[MAP_NESTED_MEMBER_DESC]], %[[MAP_NESTED_MEMBER_BADDR]], %[[MAP_REGULAR_NESTED_MEMBER]] : [0], [0, 6, 2], [0, 6, 2, 0], [0, 6, 3] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr %13 = omp.map.info var_ptr(%arg0 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>>, !fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>) map_clauses(tofrom) capture(ByRef) members(%12, %6, %5, %10 : [0], [0,6,2], [0,6,2,0], [0,6,3] : !fir.llvm_ptr>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) -> !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>> - // CHECK: omp.target map_entries(%[[MAP_PARENT_BADDR]] -> %[[ARG_1:.*]], %[[MAP_NESTED_MEMBER_DESC]] -> %[[ARG_2:.*]], %[[MAP_NESTED_MEMBER_BADDR]] -> %[[ARG_3:.*]], %[[MAP_REGULAR_NESTED_MEMBER]] -> %[[ARG_4:.*]], %[[MAP_PARENT_DESC]] -> %[[ARG_5:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { - omp.target map_entries(%12 -> %arg1, %6 -> %arg2, %5 -> %arg3, %10 -> %arg4, %13 -> %arg5 : !fir.llvm_ptr>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref, !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>>) { + // CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_PARENT_BADDR]] -> %[[ARG_1:.*]], %[[MAP_NESTED_MEMBER_DESC]] -> %[[ARG_2:.*]], %[[MAP_NESTED_MEMBER_BADDR]] -> %[[ARG_3:.*]], %[[MAP_REGULAR_NESTED_MEMBER]] -> %[[ARG_4:.*]], %[[MAP_PARENT_DESC]] -> %[[ARG_5:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%12 -> %arg1, %6 -> %arg2, %5 -> %arg3, %10 -> %arg4, %13 -> %arg5 : !fir.llvm_ptr>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref, !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>>) { omp.terminator } return @@ -1262,8 +1262,8 @@ func.func @map_nested_dtype_alloca_mem2(%arg0 : !fir.ref>>>, !fir.box>>) map_clauses(tofrom) capture(ByRef) -> !fir.ref>>> // CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, [[REC_TY]]) map_clauses(tofrom) capture(ByRef) members(%[[MAP_NESTED_ALLOCATABLE_MEMBER_DESC]], %[[MAP_NESTED_ALLOCATABLE_MEMBER_BADDR]] : [6, 2], [6, 2, 0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true} %6 = omp.map.info var_ptr(%arg0 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>, !fir.type<_QFRecTy{i:f32,scalar:!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>) map_clauses(tofrom) capture(ByRef) members(%5, %4 : [6,2], [6,2,0] : !fir.ref>>>, !fir.llvm_ptr>>) -> !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>> {partial_map = true} - // CHECK: omp.target map_entries(%[[MAP_NESTED_ALLOCATABLE_MEMBER_DESC]] -> %[[ARG_1:.*]], %[[MAP_NESTED_ALLOCATABLE_MEMBER_BADDR]] -> %[[ARG_2:.*]], %[[MAP_PARENT]] -> %[[ARG_3:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) { - omp.target map_entries(%5 -> %arg1, %4 -> %arg2, %6 -> %arg3 : !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>) { + // CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_NESTED_ALLOCATABLE_MEMBER_DESC]] -> %[[ARG_1:.*]], %[[MAP_NESTED_ALLOCATABLE_MEMBER_BADDR]] -> %[[ARG_2:.*]], %[[MAP_PARENT]] -> %[[ARG_3:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%5 -> %arg1, %4 -> %arg2, %6 -> %arg3 : !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFRecTy2{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>) { omp.terminator } return @@ -1298,7 +1298,7 @@ func.func @map_privatized_boxchar(%arg0 : !fir.boxchar<1>) { %7 = fir.box_offset %0 base_addr : (!fir.ref>) -> !fir.llvm_ptr>> %8 = omp.map.info var_ptr(%0 : !fir.ref>, !fir.char<1,?>) map_clauses(implicit, to) capture(ByRef) var_ptr_ptr(%7 : !fir.llvm_ptr>>) -> !fir.ref> %9 = omp.map.info var_ptr(%0 : !fir.ref>, !fir.boxchar<1>) map_clauses(to) capture(ByRef) members(%8 : [0] : !fir.ref>) -> !fir.ref> - omp.target map_entries(%9 -> %arg1, %8 -> %arg2 : !fir.ref>, !fir.ref>) private(@boxchar_privatizer %arg0 -> %arg3 [map_idx=0] : !fir.boxchar<1>) { + omp.target kernel_type(generic) map_entries(%9 -> %arg1, %8 -> %arg2 : !fir.ref>, !fir.ref>) private(@boxchar_privatizer %arg0 -> %arg3 [map_idx=0] : !fir.boxchar<1>) { omp.terminator } return @@ -1311,4 +1311,4 @@ func.func @map_privatized_boxchar(%arg0 : !fir.boxchar<1>) { // CHECK: %[[BASE_ADDR:.*]] = llvm.getelementptr %[[BOXCHAR_ALLOCA]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64)> // CHECK: %[[MAP_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[BOXCHAR_ALLOCA]] : !llvm.ptr, i8) map_clauses(implicit, to) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR]] : !llvm.ptr) -> !llvm.ptr // CHECK: %[[MAP_BOXCHAR:.*]] = omp.map.info var_ptr(%[[BOXCHAR_ALLOCA]] : !llvm.ptr, !llvm.struct<(ptr, i64)>) map_clauses(to) capture(ByRef) members(%[[MAP_BASE_ADDR]] : [0] : !llvm.ptr) -> !llvm.ptr -// CHECK: omp.target map_entries(%[[MAP_BOXCHAR]] -> %arg1, %[[MAP_BASE_ADDR]] -> %arg2 : !llvm.ptr, !llvm.ptr) private(@boxchar_privatizer %[[ARG0]] -> %arg3 [map_idx=0] : !llvm.struct<(ptr, i64)>) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_BOXCHAR]] -> %arg1, %[[MAP_BASE_ADDR]] -> %arg2 : !llvm.ptr, !llvm.ptr) private(@boxchar_privatizer %[[ARG0]] -> %arg3 [map_idx=0] : !llvm.struct<(ptr, i64)>) { diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-allocatable.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-allocatable.f90 index e0fb56882c032..95dcb1595ea6b 100644 --- a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-allocatable.f90 +++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-allocatable.f90 @@ -75,7 +75,7 @@ end subroutine target_allocatable ! CPU: %[[MEMBER:.*]] = omp.map.info var_ptr(%[[VAR_DECL]]#0 : [[TYPE]], i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR]] : [[MEMBER_TYPE:.*]]) -> {{.*}} ! CPU: %[[MAP_VAR:.*]] = omp.map.info var_ptr(%[[VAR_DECL]]#0 : [[TYPE]], [[DESC_TYPE]]) map_clauses(always, to) capture(ByRef) members(%[[MEMBER]] : [0] : !fir.llvm_ptr>) -> !fir.ref>> -! CPU: omp.target map_entries(%[[MAP_VAR]] -> %arg0, %[[MEMBER]] -> %arg1 : [[TYPE]], [[MEMBER_TYPE]]) private( +! CPU: omp.target kernel_type(generic) map_entries(%[[MAP_VAR]] -> %arg0, %[[MEMBER]] -> %arg1 : [[TYPE]], [[MEMBER_TYPE]]) private( ! CPU-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} [map_idx=0] : [[TYPE]]) { ! GPU-LABEL: omp.private {type = private} {{.*}} init { diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-implicit-scalar-map-2.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-implicit-scalar-map-2.f90 index 676686f6a2def..2e7841fd6ad97 100644 --- a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-implicit-scalar-map-2.f90 +++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-implicit-scalar-map-2.f90 @@ -37,4 +37,4 @@ end subroutine target_imp_capture ! CHECK-PROG: %[[VAL_3:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ptr>, !fir.array<25x2xf32>) map_clauses(implicit, tofrom) capture(ByRef) bounds({{.*}}) -> !fir.ptr> {name = "k"} ! CHECK-PROG: %[[VAL_4:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref, i32) map_clauses(to) capture(ByCopy) -> !fir.ref ! CHECK-PROG: %[[VAL_5:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref, i32) map_clauses(to) capture(ByCopy) -> !fir.ref -! CHECK-PROG: omp.target map_entries(%[[VAL_0]] -> %[[VAL_6:.*]], %[[VAL_1]] -> %[[VAL_7:.*]], %[[VAL_2]] -> %[[VAL_8:.*]], %[[VAL_3]] -> %[[VAL_9:.*]], %[[VAL_4]] -> %[[VAL_10:.*]], %[[VAL_5]] -> %[[VAL_11:.*]] : !fir.ref, !fir.ref>, !fir.ptr>, !fir.ptr>, !fir.ref, !fir.ref) private(@_QFtarget_imp_captureEy_firstprivate_i32 %{{.*}}#0 -> %[[VAL_12:.*]] [map_idx=4], @_QMtest_dataEz_firstprivate_i32 %{{.*}}#0 -> %[[VAL_13:.*]] [map_idx=5] : !fir.ref, !fir.ref) { +! CHECK-PROG: omp.target kernel_type(generic) map_entries(%[[VAL_0]] -> %[[VAL_6:.*]], %[[VAL_1]] -> %[[VAL_7:.*]], %[[VAL_2]] -> %[[VAL_8:.*]], %[[VAL_3]] -> %[[VAL_9:.*]], %[[VAL_4]] -> %[[VAL_10:.*]], %[[VAL_5]] -> %[[VAL_11:.*]] : !fir.ref, !fir.ref>, !fir.ptr>, !fir.ptr>, !fir.ref, !fir.ref) private(@_QFtarget_imp_captureEy_firstprivate_i32 %{{.*}}#0 -> %[[VAL_12:.*]] [map_idx=4], @_QMtest_dataEz_firstprivate_i32 %{{.*}}#0 -> %[[VAL_13:.*]] [map_idx=5] : !fir.ref, !fir.ref) { diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-implicit-scalar-map.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-implicit-scalar-map.f90 index 52a1f31cc3ce3..8ce17671d0221 100644 --- a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-implicit-scalar-map.f90 +++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-implicit-scalar-map.f90 @@ -25,7 +25,7 @@ ! CHECK: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_3]] : !fir.ref, i32) map_clauses(to) capture(ByCopy) -> !fir.ref ! CHECK: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref, i32) map_clauses(to) capture(ByCopy) -> !fir.ref -! CHECK: omp.target map_entries(%[[VAL_4]] -> %{{.*}}, %[[VAL_5]] -> %{{.*}} : !fir.ref, !fir.ref) private(@_QFExfpvx_firstprivate_i32 %[[VAL_3]] -> %[[VAL_6:.*]] [map_idx=0], @_QFExdgfx_firstprivate_i32 %[[VAL_1]] -> %[[VAL_7:.*]] [map_idx=1] : !fir.ref, !fir.ref) { +! CHECK: omp.target kernel_type(generic) map_entries(%[[VAL_4]] -> %{{.*}}, %[[VAL_5]] -> %{{.*}} : !fir.ref, !fir.ref) private(@_QFExfpvx_firstprivate_i32 %[[VAL_3]] -> %[[VAL_6:.*]] [map_idx=0], @_QFExdgfx_firstprivate_i32 %[[VAL_1]] -> %[[VAL_7:.*]] [map_idx=1] : !fir.ref, !fir.ref) { ! CHECK: %{{.*}} = fir.declare %[[VAL_6]] {uniq_name = "_QFExfpvx"} : (!fir.ref) -> !fir.ref ! CHECK: %{{.*}} = fir.declare %[[VAL_7]] {uniq_name = "_QFExdgfx"} : (!fir.ref) -> !fir.ref diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-simple.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-simple.f90 index f543068d29753..98a36ede98abd 100644 --- a/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-simple.f90 +++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-private-simple.f90 @@ -21,7 +21,7 @@ end subroutine target_simple ! CHECK: %[[VAR_ALLOC:.*]] = fir.alloca i32 {bindc_name = "simple_var", {{.*}}} ! CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_ALLOC]] -! CHECK: omp.target private( +! CHECK: omp.target kernel_type(generic) private( ! CHECK-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %[[REG_ARG:.*]] : !fir.ref) { ! CHECK: %[[REG_DECL:.*]]:2 = hlfir.declare %[[REG_ARG]] ! CHECK: %[[C10:.*]] = arith.constant 10 diff --git a/flang/test/Lower/OpenMP/DelayedPrivatization/target-teams-private-implicit-scalar-map.f90 b/flang/test/Lower/OpenMP/DelayedPrivatization/target-teams-private-implicit-scalar-map.f90 index d476b4859a586..063352ce69fe0 100644 --- a/flang/test/Lower/OpenMP/DelayedPrivatization/target-teams-private-implicit-scalar-map.f90 +++ b/flang/test/Lower/OpenMP/DelayedPrivatization/target-teams-private-implicit-scalar-map.f90 @@ -32,7 +32,7 @@ program test_default_implicit_firstprivate !CHECK: %[[VAL_13:.*]] = omp.map.info var_ptr(%[[VAL_1]] : !fir.ref>, !fir.array<10x10x10xi32>) map_clauses(implicit, tofrom) capture(ByRef) bounds({{.*}}) -> !fir.ref> {name = "arr"} !CHECK: %[[VAL_14:.*]] = omp.map.info var_ptr(%[[VAL_6]] : !fir.ref, i32) map_clauses(to) capture(ByCopy) -> !fir.ref !CHECK: %[[VAL_15:.*]] = omp.map.info var_ptr(%[[VAL_5]] : !fir.ref, i32) map_clauses(to) capture(ByCopy) -> !fir.ref -!CHECK: omp.target host_eval({{.*}}) map_entries(%[[VAL_7]] -> %{{.*}}, %[[VAL_8]] -> %{{.*}}, %[[VAL_9]] -> %{{.*}}, %[[VAL_12]] -> %{{.*}}, %[[VAL_13]] -> %{{.*}}, %[[VAL_14]] -> %{{.*}}, %[[VAL_15]] -> %{{.*}}, %[[VAL_11]] -> %{{.*}} : {{.*}}) private(@[[SYM_XFPVX]] %[[VAL_6]] -> %{{.*}} [map_idx=5], @[[SYM_XDGFX]] %[[VAL_5]] -> %{{.*}} [map_idx=6] : {{.*}}) { +!CHECK: omp.target kernel_type(spmd) host_eval({{.*}}) map_entries(%[[VAL_7]] -> %{{.*}}, %[[VAL_8]] -> %{{.*}}, %[[VAL_9]] -> %{{.*}}, %[[VAL_12]] -> %{{.*}}, %[[VAL_13]] -> %{{.*}}, %[[VAL_14]] -> %{{.*}}, %[[VAL_15]] -> %{{.*}}, %[[VAL_11]] -> %{{.*}} : {{.*}}) private(@[[SYM_XFPVX]] %[[VAL_6]] -> %{{.*}} [map_idx=5], @[[SYM_XDGFX]] %[[VAL_5]] -> %{{.*}} [map_idx=6] : {{.*}}) { !CHECK omp.parallel private(@[[SYM_XFPVX]] %{{.*}} -> %{{.*}}, @[[SYM_XDGFX]] %{{.*}} -> %{{.*}}, @[[SYM_I]] %{{.*}} -> %{{.*}}, @[[SYM_J]] %{{.*}} -> %{{.*}}, @[[SYM_K]] %{{.*}} -> %{{.*}} : {{.*}}) { !$omp target teams distribute parallel do collapse(3) firstprivate(xfpvx) do i = 1, 10 diff --git a/flang/test/Lower/OpenMP/KernelLanguage/bare-clause.f90 b/flang/test/Lower/OpenMP/KernelLanguage/bare-clause.f90 index 1445c4fa225d2..4c6c1a80c1102 100644 --- a/flang/test/Lower/OpenMP/KernelLanguage/bare-clause.f90 +++ b/flang/test/Lower/OpenMP/KernelLanguage/bare-clause.f90 @@ -7,4 +7,4 @@ program test !$omp end target teams end program -! CHECK: omp.target ompx_bare +! CHECK: omp.target kernel_type(bare) diff --git a/flang/test/Lower/OpenMP/allocatable-map.f90 b/flang/test/Lower/OpenMP/allocatable-map.f90 index e1c4694d7d6b7..2f0f982035042 100644 --- a/flang/test/Lower/OpenMP/allocatable-map.f90 +++ b/flang/test/Lower/OpenMP/allocatable-map.f90 @@ -4,7 +4,7 @@ !HLFIRDIALECT: %[[BOX_OFF:.*]] = fir.box_offset %[[POINTER]]#1 base_addr : (!fir.ref>>) -> !fir.llvm_ptr> !HLFIRDIALECT: %[[POINTER_MAP_MEMBER:.*]] = omp.map.info var_ptr(%[[POINTER]]#1 : !fir.ref>>, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_OFF]] : !fir.llvm_ptr>) -> !fir.llvm_ptr> {name = ""} !HLFIRDIALECT: %[[POINTER_MAP:.*]] = omp.map.info var_ptr(%[[POINTER]]#1 : !fir.ref>>, !fir.box>) map_clauses(always, to) capture(ByRef) members(%[[POINTER_MAP_MEMBER]] : [0] : !fir.llvm_ptr>) -> !fir.ref>> {name = "point"} -!HLFIRDIALECT: omp.target map_entries(%[[POINTER_MAP]] -> {{.*}}, %[[POINTER_MAP_MEMBER]] -> {{.*}} : !fir.ref>>, !fir.llvm_ptr>) { +!HLFIRDIALECT: omp.target kernel_type(generic) map_entries(%[[POINTER_MAP]] -> {{.*}}, %[[POINTER_MAP_MEMBER]] -> {{.*}} : !fir.ref>>, !fir.llvm_ptr>) { subroutine pointer_routine() integer, pointer :: point !$omp target map(tofrom:point) diff --git a/flang/test/Lower/OpenMP/array-bounds.f90 b/flang/test/Lower/OpenMP/array-bounds.f90 index 670784254adac..1ddbff703428c 100644 --- a/flang/test/Lower/OpenMP/array-bounds.f90 +++ b/flang/test/Lower/OpenMP/array-bounds.f90 @@ -21,7 +21,7 @@ !HOST: %[[C6:.*]] = arith.constant 4 : index !HOST: %[[BOUNDS1:.*]] = omp.map.bounds lower_bound(%[[C5]] : index) upper_bound(%[[C6]] : index) extent(%[[C10_0]] : index) stride(%[[C4]] : index) start_idx(%[[C4]] : index) !HOST: %[[MAP1:.*]] = omp.map.info var_ptr(%[[WRITE_DECL]]#1 : !fir.ref>, !fir.array<10xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !fir.ref> {name = "sp_write(2:5)"} -!HOST: omp.target map_entries(%[[MAP0]] -> %{{.*}}, %[[MAP1]] -> %{{.*}}, {{.*}} -> {{.*}} : !fir.ref>, !fir.ref>, !fir.ref) { +!HOST: omp.target kernel_type(generic) map_entries(%[[MAP0]] -> %{{.*}}, %[[MAP1]] -> %{{.*}}, {{.*}} -> {{.*}} : !fir.ref>, !fir.ref>, !fir.ref) { subroutine read_write_section() integer :: sp_read(10) = (/1,2,3,4,5,6,7,8,9,10/) @@ -53,7 +53,7 @@ module assumed_array_routines !HOST: %[[VAR_PTR_PTR:.*]] = fir.box_offset %[[INTERMEDIATE_ALLOCA]] base_addr : (!fir.ref>>) -> !fir.llvm_ptr>> !HOST: %[[MAP_INFO_MEMBER:.*]] = omp.map.info var_ptr(%[[INTERMEDIATE_ALLOCA]] : !fir.ref>>, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[VAR_PTR_PTR]] : !fir.llvm_ptr>>) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {name = ""} !HOST: %[[MAP:.*]] = omp.map.info var_ptr(%[[INTERMEDIATE_ALLOCA]] : !fir.ref>>, !fir.box>) map_clauses(always, to) capture(ByRef) members(%[[MAP_INFO_MEMBER]] : [0] : !fir.llvm_ptr>>) -> !fir.ref> {name = "arr_read_write(2:5)"} -!HOST: omp.target map_entries(%[[MAP]] -> %{{.*}}, {{.*}} -> {{.*}}, %[[MAP_INFO_MEMBER]] -> %{{.*}} : !fir.ref>, !fir.ref, !fir.llvm_ptr>>) { +!HOST: omp.target kernel_type(generic) map_entries(%[[MAP]] -> %{{.*}}, {{.*}} -> {{.*}}, %[[MAP_INFO_MEMBER]] -> %{{.*}} : !fir.ref>, !fir.ref, !fir.llvm_ptr>>) { subroutine assumed_shape_array(arr_read_write) integer, intent(inout) :: arr_read_write(:) @@ -75,7 +75,7 @@ end subroutine assumed_shape_array !HOST: %[[EXT:.*]] = arith.addi %[[C4_1]], %c1{{.*}} : index !HOST: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%c1{{.*}} : index) upper_bound(%c4{{.*}} : index) extent(%[[EXT]] : index) stride(%[[DIMS0]]#2 : index) start_idx(%c1{{.*}} : index) {stride_in_bytes = true} !HOST: %[[MAP:.*]] = omp.map.info var_ptr(%[[ARG0_DECL]]#1 : !fir.ref>, i32) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "arr_read_write(2:5)"} -!HOST: omp.target map_entries(%[[MAP]] -> %{{.*}}, {{.*}} -> {{.*}} : !fir.ref>, !fir.ref) { +!HOST: omp.target kernel_type(generic) map_entries(%[[MAP]] -> %{{.*}}, {{.*}} -> {{.*}} : !fir.ref>, !fir.ref) { subroutine assumed_size_array(arr_read_write) integer, intent(inout) :: arr_read_write(*) diff --git a/flang/test/Lower/OpenMP/common-block-map.f90 b/flang/test/Lower/OpenMP/common-block-map.f90 index 7c690c96ddb0b..98836306ef041 100644 --- a/flang/test/Lower/OpenMP/common-block-map.f90 +++ b/flang/test/Lower/OpenMP/common-block-map.f90 @@ -6,7 +6,7 @@ !CHECK-LABEL: func.func @_QPmap_full_block !CHECK: %[[CB_ADDR:.*]] = fir.address_of(@var_common_) : !fir.ref> !CHECK: %[[MAP:.*]] = omp.map.info var_ptr(%[[CB_ADDR]] : !fir.ref>, !fir.array<8xi8>) map_clauses(tofrom) capture(ByRef) -> !fir.ref> {name = "var_common"} -!CHECK: omp.target map_entries(%[[MAP]] -> %[[MAP_ARG:.*]] : !fir.ref>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[MAP]] -> %[[MAP_ARG:.*]] : !fir.ref>) { !CHECK: %[[INDEX:.*]] = arith.constant 0 : index !CHECK: %[[COORD:.*]] = fir.coordinate_of %[[MAP_ARG]], %[[INDEX]] : (!fir.ref>, index) -> !fir.ref !CHECK: %[[CONV2:.*]] = fir.convert %[[COORD]] : (!fir.ref) -> !fir.ref @@ -37,7 +37,7 @@ subroutine map_full_block !CHECK: %[[CB_MEMBER_2:.*]]:2 = hlfir.declare %[[CONV]] storage(%[[COMMON_BLOCK]][4]) {uniq_name = "_QFmap_mix_of_membersEvar2"} : (!fir.ref, !fir.ref>) -> (!fir.ref, !fir.ref) !CHECK: %[[MAP_EXP:.*]] = omp.map.info var_ptr(%[[CB_MEMBER_2]]#1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "var2"} !CHECK: %[[MAP_IMP:.*]] = omp.map.info var_ptr(%[[CB_MEMBER_1]]#1 : !fir.ref, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref {name = "var1"} -!CHECK: omp.target map_entries(%[[MAP_EXP]] -> %[[ARG_EXP:.*]], %[[MAP_IMP]] -> %[[ARG_IMP:.*]] : !fir.ref, !fir.ref) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_EXP]] -> %[[ARG_EXP:.*]], %[[MAP_IMP]] -> %[[ARG_IMP:.*]] : !fir.ref, !fir.ref) { !CHECK: %[[EXP_MEMBER:.*]]:2 = hlfir.declare %[[ARG_EXP]] {uniq_name = "_QFmap_mix_of_membersEvar2"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK: %[[IMP_MEMBER:.*]]:2 = hlfir.declare %[[ARG_IMP]] {uniq_name = "_QFmap_mix_of_membersEvar1"} : (!fir.ref) -> (!fir.ref, !fir.ref) subroutine map_mix_of_members @@ -53,7 +53,7 @@ subroutine map_mix_of_members !CHECK-LABEL: @_QQmain !CHECK: %[[DECL_TAR_CB:.*]] = fir.address_of(@var_common_link_) : !fir.ref> !CHECK: %[[MAP_DECL_TAR_CB:.*]] = omp.map.info var_ptr(%[[DECL_TAR_CB]] : !fir.ref>, !fir.array<8xi8>) map_clauses(tofrom) capture(ByRef) -> !fir.ref> {name = "var_common_link"} -!CHECK: omp.target map_entries(%[[MAP_DECL_TAR_CB]] -> %[[MAP_DECL_TAR_ARG:.*]] : !fir.ref>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_DECL_TAR_CB]] -> %[[MAP_DECL_TAR_ARG:.*]] : !fir.ref>) { !CHECK: %[[INDEX:.*]] = arith.constant 0 : index !CHECK: %[[COORD:.*]] = fir.coordinate_of %[[MAP_DECL_TAR_ARG]], %[[INDEX]] : (!fir.ref>, index) -> !fir.ref !CHECK: %[[CONV:.*]] = fir.convert %[[COORD]] : (!fir.ref) -> !fir.ref diff --git a/flang/test/Lower/OpenMP/declare-mapper.f90 b/flang/test/Lower/OpenMP/declare-mapper.f90 index 7eda1a4c497be..f3468dcff2bb3 100644 --- a/flang/test/Lower/OpenMP/declare-mapper.f90 +++ b/flang/test/Lower/OpenMP/declare-mapper.f90 @@ -300,7 +300,7 @@ subroutine declare_mapper_nested_parent ! Check implicit maps for deep nested allocatable payloads not covered by mapper ! CHECK-DAG: omp.map.info {{.*}} {name = "r.deep_arr.implicit_map"} - ! CHECK: omp.target + ! CHECK: omp.target kernel_type(generic) !$omp target map(mapper(custommapper), tofrom: r) r%real_arr = r%base_arr(1) + r%inner%deep_arr(1) !$omp end target diff --git a/flang/test/Lower/OpenMP/defaultmap.f90 b/flang/test/Lower/OpenMP/defaultmap.f90 index 2d464bc660d25..a63794dd276a3 100644 --- a/flang/test/Lower/OpenMP/defaultmap.f90 +++ b/flang/test/Lower/OpenMP/defaultmap.f90 @@ -119,7 +119,7 @@ subroutine defaultmap_scalar_implicit_mapper() ! CHECK-LABEL: func.func @_QPdefaultmap_scalar_implicit_mapper ! CHECK: %[[BASE_MAP:.*]] = omp.map.info {{.*}} map_clauses(implicit, tofrom) capture(ByRef) {{.*}} mapper(@{{.*}}) -> {{.*}} {name = ""} ! CHECK: %[[DESC_MAP:.*]] = omp.map.info {{.*}} map_clauses(always, implicit, to) capture(ByRef) members(%[[BASE_MAP]] : [0] : {{.*}}) -> {{.*}} {name = "obj"} -! CHECK: omp.target map_entries(%[[DESC_MAP]] -> {{.*}}, %[[BASE_MAP]] -> {{.*}}) +! CHECK: omp.target kernel_type(generic) map_entries(%[[DESC_MAP]] -> {{.*}}, %[[BASE_MAP]] -> {{.*}}) allocate(obj) !$omp target defaultmap(tofrom: scalar) obj%k = 40 diff --git a/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90 b/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90 index 74aee4df1f454..665c41a9dc7f4 100644 --- a/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90 +++ b/flang/test/Lower/OpenMP/derived-type-allocatable-map.f90 @@ -8,7 +8,7 @@ !CHECK: %[[MAP_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref>>>, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[MEMBER_BASE_ADDR]] : !fir.llvm_ptr>>) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {{.*}} !CHECK: %[[MAP_MEMBER_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref>>>, !fir.box>>) map_clauses(always, to) capture(ByRef) -> !fir.ref>>> {{.*}} !CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref>, !fir.type<[[ONE_LAYER_TY]]>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_DESCRIPTOR]], %[[MAP_MEMBER_BASE_ADDR]] : [4], [4, 0] : !fir.ref>>>, !fir.llvm_ptr>>) -> !fir.ref> {{{.*}} partial_map = true} -!CHECK: omp.target map_entries(%[[MAP_PARENT]] -> %[[ARG0:.*]], %[[MAP_MEMBER_DESCRIPTOR]] -> %[[ARG1:.*]], %[[MAP_MEMBER_BASE_ADDR]] -> %[[ARG2:.*]] : !fir.ref>, !fir.ref>>>, !fir.llvm_ptr>>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_PARENT]] -> %[[ARG0:.*]], %[[MAP_MEMBER_DESCRIPTOR]] -> %[[ARG1:.*]], %[[MAP_MEMBER_BASE_ADDR]] -> %[[ARG2:.*]] : !fir.ref>, !fir.ref>>>, !fir.llvm_ptr>>) { !CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] {{{.*}}} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) subroutine dtype_alloca_map_op_block() type :: one_layer @@ -43,7 +43,7 @@ subroutine dtype_alloca_map_op_block() !CHECK: %[[DTYPE_BASE_ADDR:.*]] = fir.box_offset %[[DECLARE]]#1 base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[MAP_DTYPE_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref>>>, !fir.type<[[REC_TY]]>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[DTYPE_BASE_ADDR]] : !fir.llvm_ptr>>) -> !fir.llvm_ptr>> {{.*}} !CHECK: %[[MAP_DTYPE_DESC:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref>>>, !fir.box>>) map_clauses(always, to) capture(ByRef) members(%[[MAP_DTYPE_BASE_ADDR]], %[[MAP_MEMBER_DESC]], %[[MAP_MEMBER_BASE_ADDR]], %[[MAP_REGULAR_MEMBER]] : [0], [0, 4], [0, 4, 0], [0, 5] : !fir.llvm_ptr>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) -> !fir.ref>>> {{.*}} -!CHECK: omp.target map_entries(%[[MAP_DTYPE_DESC]] -> %[[ARG0:.*]], %[[MAP_DTYPE_BASE_ADDR]] -> %[[ARG1:.*]], %[[MAP_MEMBER_DESC]] -> %[[ARG2:.*]], %[[MAP_MEMBER_BASE_ADDR]] -> %[[ARG3:.*]], %[[MAP_REGULAR_MEMBER]] -> %[[ARG4:.*]] : !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_DTYPE_DESC]] -> %[[ARG0:.*]], %[[MAP_DTYPE_BASE_ADDR]] -> %[[ARG1:.*]], %[[MAP_MEMBER_DESC]] -> %[[ARG2:.*]], %[[MAP_MEMBER_BASE_ADDR]] -> %[[ARG3:.*]], %[[MAP_REGULAR_MEMBER]] -> %[[ARG4:.*]] : !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) { !CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] {{{.*}}} : (!fir.ref>>>) -> (!fir.ref>>>, !fir.ref>>>) subroutine alloca_dtype_op_block_add() type :: one_layer @@ -82,7 +82,7 @@ subroutine alloca_dtype_op_block_add() !CHECK: %[[DTYPE_BASE_ADDR:.*]] = fir.box_offset %[[DECLARE]]#1 base_addr : (!fir.ref}>>>>) -> !fir.llvm_ptr}>>> !CHECK: %[[MAP_DTYPE_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref}>>>>, !fir.type<[[REC_TY]]>}>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[DTYPE_BASE_ADDR]] : !fir.llvm_ptr}>>>) -> !fir.llvm_ptr}>>> {{.*}} !CHECK: %[[MAP_DTYPE:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref}>>>>, !fir.box}>>>) map_clauses(always, to) capture(ByRef) members(%[[MAP_DTYPE_BASE_ADDR]], %[[MAP_NESTED_MEMBER_COORD]], %[[MAP_NESTED_MEMBER_BASE_ADDR]], %[[MAP_REGULAR_NESTED_MEMBER]] : [0], [0, 6, 2], [0, 6, 2, 0], [0, 6, 3] : !fir.llvm_ptr}>>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) -> !fir.ref}>>>> {{.*}} -!CHECK: omp.target map_entries(%[[MAP_DTYPE]] -> %[[ARG0:.*]], %[[MAP_DTYPE_BASE_ADDR]] -> %[[ARG1:.*]], %[[MAP_NESTED_MEMBER_COORD]] -> %[[ARG2:.*]], %[[MAP_NESTED_MEMBER_BASE_ADDR]] -> %[[ARG3:.*]], %[[MAP_REGULAR_NESTED_MEMBER]] -> %[[ARG4:.*]] : !fir.ref}>>>>, !fir.llvm_ptr}>>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_DTYPE]] -> %[[ARG0:.*]], %[[MAP_DTYPE_BASE_ADDR]] -> %[[ARG1:.*]], %[[MAP_NESTED_MEMBER_COORD]] -> %[[ARG2:.*]], %[[MAP_NESTED_MEMBER_BASE_ADDR]] -> %[[ARG3:.*]], %[[MAP_REGULAR_NESTED_MEMBER]] -> %[[ARG4:.*]] : !fir.ref}>>>>, !fir.llvm_ptr}>>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) { !CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] {{.*}} : (!fir.ref}>>>>) -> (!fir.ref}>>>>, !fir.ref}>>>>) subroutine alloca_nest_dype_map_op_block_add() type :: middle_layer @@ -122,7 +122,7 @@ subroutine alloca_nest_dype_map_op_block_add() !CHECK: %[[MAP_NESTED_MEMBER_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref>>>, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[NESTED_MEMBER_BASE_ADDR]] : !fir.llvm_ptr>>) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {{.*}} !CHECK: %[[MAP_NESTED_MEMBER_DESC:.*]] = omp.map.info var_ptr(%[[NESTED_MEMBER_COORD]] : !fir.ref>>>, !fir.box>>) map_clauses(always, to) capture(ByRef) -> !fir.ref>>> {{.*}} !CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref>, !fir.type<[[REC_TY]]>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_NESTED_MEMBER_DESC]], %[[MAP_NESTED_MEMBER_BASE_ADDR]] : [6, 2], [6, 2, 0] : !fir.ref>>>, !fir.llvm_ptr>>) -> !fir.ref> {{.*}} -!CHECK: omp.target map_entries(%[[MAP_PARENT]] -> %[[ARG0:.*]], %[[MAP_NESTED_MEMBER_DESC]] -> %[[ARG1:.*]], %[[MAP_NESTED_MEMBER_BASE_ADDR]] -> %[[ARG2:.*]] : !fir.ref>, !fir.ref>>>, !fir.llvm_ptr>>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_PARENT]] -> %[[ARG0:.*]], %[[MAP_NESTED_MEMBER_DESC]] -> %[[ARG1:.*]], %[[MAP_NESTED_MEMBER_BASE_ADDR]] -> %[[ARG2:.*]] : !fir.ref>, !fir.ref>>>, !fir.llvm_ptr>>) { !CHECK: %{{.*}}:2 = hlfir.declare %[[ARG0]] {{.*}} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) subroutine nest_dtype_alloca_map_op_block_add() type :: middle_layer diff --git a/flang/test/Lower/OpenMP/derived-type-map.f90 b/flang/test/Lower/OpenMP/derived-type-map.f90 index fb4b88a00d0af..004e2874d48c8 100644 --- a/flang/test/Lower/OpenMP/derived-type-map.f90 +++ b/flang/test/Lower/OpenMP/derived-type-map.f90 @@ -5,7 +5,7 @@ !CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.type<_QFmaptype_derived_implicitTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}> {bindc_name = "scalar_arr", uniq_name = "_QFmaptype_derived_implicitEscalar_arr"} !CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFmaptype_derived_implicitEscalar_arr"} : (!fir.ref,int:i32}>>) -> (!fir.ref,int:i32}>>, !fir.ref,int:i32}>>) !CHECK: %[[MAP:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref,int:i32}>>, !fir.type<_QFmaptype_derived_implicitTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}>) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref,int:i32}>> {name = "scalar_arr"} -!CHECK: omp.target map_entries(%[[MAP]] -> %[[ARG0:.*]] : !fir.ref,int:i32}>>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[MAP]] -> %[[ARG0:.*]] : !fir.ref,int:i32}>>) { subroutine mapType_derived_implicit type :: scalar_and_array real(4) :: real @@ -24,7 +24,7 @@ end subroutine mapType_derived_implicit !CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %[[DECLARE]]#1 base_addr : (!fir.ref,int:i32}>>>>) -> !fir.llvm_ptr,int:i32}>>> !CHECK: %[[BASE_MAP:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref,int:i32}>>>>, !fir.type<_QFmaptype_derived_implicit_allocatableTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}>) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr,int:i32}>>>) mapper(@[[MAPPER1]]) -> !fir.llvm_ptr,int:i32}>>> {name = ""} !CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref,int:i32}>>>>, !fir.box,int:i32}>>>) map_clauses(always, implicit, to) capture(ByRef) members(%[[BASE_MAP]] : [0] : !fir.llvm_ptr,int:i32}>>>) -> !fir.ref,int:i32}>>>> {name = "scalar_arr"} -!CHECK: omp.target map_entries(%[[DESC_MAP]] -> %[[ARG0:.*]], %[[BASE_MAP]] -> %[[ARG1:.*]] : !fir.ref,int:i32}>>>>, !fir.llvm_ptr,int:i32}>>>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[DESC_MAP]] -> %[[ARG0:.*]], %[[BASE_MAP]] -> %[[ARG1:.*]] : !fir.ref,int:i32}>>>>, !fir.llvm_ptr,int:i32}>>>) { subroutine mapType_derived_implicit_allocatable type :: scalar_and_array real(4) :: real @@ -42,7 +42,7 @@ end subroutine mapType_derived_implicit_allocatable !CHECK: %[[ALLOCA:.*]] = fir.alloca !fir.type<_QFmaptype_derived_explicitTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}> {bindc_name = "scalar_arr", uniq_name = "_QFmaptype_derived_explicitEscalar_arr"} !CHECK: %[[DECLARE:.*]]:2 = hlfir.declare %[[ALLOCA]] {uniq_name = "_QFmaptype_derived_explicitEscalar_arr"} : (!fir.ref,int:i32}>>) -> (!fir.ref,int:i32}>>, !fir.ref,int:i32}>>) !CHECK: %[[MAP:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref,int:i32}>>, !fir.type<_QFmaptype_derived_explicitTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}>) map_clauses(tofrom) capture(ByRef) -> !fir.ref,int:i32}>> {name = "scalar_arr"} -!CHECK: omp.target map_entries(%[[MAP]] -> %[[ARG0:.*]] : !fir.ref,int:i32}>>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[MAP]] -> %[[ARG0:.*]] : !fir.ref,int:i32}>>) { subroutine mapType_derived_explicit type :: scalar_and_array real(4) :: real @@ -62,7 +62,7 @@ end subroutine mapType_derived_explicit !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%{{.*}} : index) upper_bound(%{{.*}} : index) extent(%{{.*}} : index) stride(%{{.*}} : index) start_idx(%{{.*}} : index) !CHECK: %[[MEMBER_MAP:.*]] = omp.map.info var_ptr(%[[MEMBER]] : !fir.ref>, !fir.array<10xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "scalar_arr%array"} !CHECK: %[[PARENT_MAP:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref,int:i32}>>, !fir.type<_QFmaptype_derived_explicit_single_memberTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}>) map_clauses(tofrom) capture(ByRef) members(%[[MEMBER_MAP]] : [1] : !fir.ref>) -> !fir.ref,int:i32}>> {name = "scalar_arr", partial_map = true} -!CHECK: omp.target map_entries(%[[PARENT_MAP]] -> %[[ARG0:.*]], %[[MEMBER_MAP]] -> %[[ARG1:.*]] : !fir.ref,int:i32}>>, !fir.ref>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[PARENT_MAP]] -> %[[ARG0:.*]], %[[MEMBER_MAP]] -> %[[ARG1:.*]] : !fir.ref,int:i32}>>, !fir.ref>) { subroutine mapType_derived_explicit_single_member type :: scalar_and_array real(4) :: real @@ -83,7 +83,7 @@ end subroutine mapType_derived_explicit_single_member !CHECK: %[[MEMBER2:.*]] = hlfir.designate %[[DECLARE]]#0{"real"} : (!fir.ref,int:i32}>>) -> !fir.ref !CHECK: %[[MEMBER_MAP_2:.*]] = omp.map.info var_ptr(%[[MEMBER2]] : !fir.ref, f32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "scalar_arr%real"} !CHECK: %[[PARENT_MAP:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref,int:i32}>>, !fir.type<_QFmaptype_derived_explicit_multiple_membersTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}>) map_clauses(tofrom) capture(ByRef) members(%[[MEMBER_MAP_1]], %[[MEMBER_MAP_2]] : [2], [0] : !fir.ref, !fir.ref) -> !fir.ref,int:i32}>> {name = "scalar_arr", partial_map = true} -!CHECK: omp.target map_entries(%[[PARENT_MAP]] -> %[[ARG0:.*]], %[[MEMBER_MAP_1]] -> %[[ARG1:.*]], %[[MEMBER_MAP_2]] -> %[[ARG2:.*]] : !fir.ref,int:i32}>>, !fir.ref, !fir.ref) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[PARENT_MAP]] -> %[[ARG0:.*]], %[[MEMBER_MAP_1]] -> %[[ARG1:.*]], %[[MEMBER_MAP_2]] -> %[[ARG2:.*]] : !fir.ref,int:i32}>>, !fir.ref, !fir.ref) { subroutine mapType_derived_explicit_multiple_members type :: scalar_and_array real(4) :: real @@ -106,7 +106,7 @@ end subroutine mapType_derived_explicit_multiple_members !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%[[LB]] : index) upper_bound(%[[UB]] : index) extent(%{{.*}} : index) stride(%{{.*}} : index) start_idx(%{{.*}} : index) !CHECK: %[[MEMBER_MAP:.*]] = omp.map.info var_ptr(%[[MEMBER]] : !fir.ref>, !fir.array<10xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "scalar_arr%array(2:5)"} !CHECK: %[[PARENT_MAP:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : !fir.ref,int:i32}>>, !fir.type<_QFmaptype_derived_explicit_member_with_boundsTscalar_and_array{real:f32,array:!fir.array<10xi32>,int:i32}>) map_clauses(tofrom) capture(ByRef) members(%[[MEMBER_MAP]] : [1] : !fir.ref>) -> !fir.ref,int:i32}>> {name = "scalar_arr", partial_map = true} -!CHECK: omp.target map_entries(%[[PARENT_MAP]] -> %[[ARG0:.*]], %[[MEMBER_MAP]] -> %[[ARG1:.*]] : !fir.ref,int:i32}>>, !fir.ref>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[PARENT_MAP]] -> %[[ARG0:.*]], %[[MEMBER_MAP]] -> %[[ARG1:.*]] : !fir.ref,int:i32}>>, !fir.ref>) { subroutine mapType_derived_explicit_member_with_bounds type :: scalar_and_array real(4) :: real @@ -127,7 +127,7 @@ end subroutine mapType_derived_explicit_member_with_bounds !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%{{.*}} : index) upper_bound(%{{.*}} : index) extent(%{{.*}} : index) stride(%{{.*}} : index) start_idx(%{{.*}} : index) !CHECK: %[[MEMBER_MAP:.*]] = omp.map.info var_ptr(%[[NEST_MEMBER]] : !fir.ref>, !fir.array<10xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "scalar_arr%nest%array"} !CHECK: %[[PARENT_MAP:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : {{.*}}) map_clauses(tofrom) capture(ByRef) members(%[[MEMBER_MAP]] : [2, 2] : !fir.ref>) -> {{.*}} {name = "scalar_arr", partial_map = true} -!CHECK: omp.target map_entries(%[[PARENT_MAP]] -> %[[ARG0:.*]], %[[MEMBER_MAP]] -> %[[ARG1:.*]] : {{.*}}, {{.*}}) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[PARENT_MAP]] -> %[[ARG0:.*]], %[[MEMBER_MAP]] -> %[[ARG1:.*]] : {{.*}}, {{.*}}) { subroutine mapType_derived_nested_explicit_single_member type :: nested integer(4) :: int @@ -158,7 +158,7 @@ end subroutine mapType_derived_nested_explicit_single_member !CHECK: %[[NEST_MEMBER2:.*]] = hlfir.designate %[[NEST]]{"real"} : ({{.*}}) -> !fir.ref !CHECK: %[[MEMBER_MAP_2:.*]] = omp.map.info var_ptr(%[[NEST_MEMBER2]] : !fir.ref, f32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "scalar_arr%nest%real"} !CHECK: %[[PARENT_MAP:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : {{.*}}, {{.*}}) map_clauses(tofrom) capture(ByRef) members(%[[MEMBER_MAP_1]], %[[MEMBER_MAP_2]] : [2, 0], [2, 1] : !fir.ref, !fir.ref) -> {{.*}} {name = "scalar_arr", partial_map = true} -!CHECK: omp.target map_entries(%[[PARENT_MAP]] -> %[[ARG0:.*]], %[[MEMBER_MAP_1]] -> %[[ARG1:.*]], %[[MEMBER_MAP_2]] -> %[[ARG2:.*]] : {{.*}}, !fir.ref, !fir.ref) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[PARENT_MAP]] -> %[[ARG0:.*]], %[[MEMBER_MAP_1]] -> %[[ARG1:.*]], %[[MEMBER_MAP_2]] -> %[[ARG2:.*]] : {{.*}}, !fir.ref, !fir.ref) { subroutine mapType_derived_nested_explicit_multiple_members type :: nested integer(4) :: int @@ -191,7 +191,7 @@ end subroutine mapType_derived_nested_explicit_multiple_members !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%[[C1_2]] : index) upper_bound(%[[C4]] : index) extent(%[[C10]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index) !CHECK: %[[MEMBER_MAP:.*]] = omp.map.info var_ptr(%[[NEST_MEMBER]] : !fir.ref>, !fir.array<10xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "scalar_arr%nest%array(2:5)"} !CHECK: %[[PARENT_MAP:.*]] = omp.map.info var_ptr(%[[DECLARE]]#1 : {{.*}}, {{.*}}) map_clauses(tofrom) capture(ByRef) members(%[[MEMBER_MAP]] : [2, 2] : !fir.ref>) -> {{.*}} {name = "scalar_arr", partial_map = true} -!CHECK: omp.target map_entries(%[[PARENT_MAP]] -> %[[ARG0:.*]], %[[MEMBER_MAP]] -> %[[ARG1:.*]] : {{.*}}, !fir.ref>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[PARENT_MAP]] -> %[[ARG0:.*]], %[[MEMBER_MAP]] -> %[[ARG1:.*]] : {{.*}}, !fir.ref>) { subroutine mapType_derived_nested_explicit_member_with_bounds type :: nested integer(4) :: int @@ -225,7 +225,7 @@ end subroutine mapType_derived_nested_explicit_member_with_bounds !CHECK: %[[MAP_MEMBER_2:.*]] = omp.map.info var_ptr(%[[MEMBER_2]] : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "scalar_arr2%nest%int"} !CHECK: %[[MAP_PARENT_1:.*]] = omp.map.info var_ptr(%[[DECLARE_1]]#1 : {{.*}}) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_1]] : [2, 0] : !fir.ref) -> {{.*}} {name = "scalar_arr1", partial_map = true} !CHECK: %[[MAP_PARENT_2:.*]] = omp.map.info var_ptr(%[[DECLARE_2]]#1 : {{.*}}) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_2]] : [2, 0] : !fir.ref) -> {{.*}} {name = "scalar_arr2", partial_map = true} -!CHECK: omp.target map_entries(%[[MAP_PARENT_1]] -> %[[ARG0:.*]], %[[MAP_PARENT_2:.*]] -> %[[ARG1:.*]], %[[MAP_MEMBER_1]] -> %[[ARG2:.*]], %[[MAP_MEMBER_2]] -> %[[ARG3:.*]] : {{.*}}, {{.*}}, !fir.ref, !fir.ref) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_PARENT_1]] -> %[[ARG0:.*]], %[[MAP_PARENT_2:.*]] -> %[[ARG1:.*]], %[[MAP_MEMBER_1]] -> %[[ARG2:.*]], %[[MAP_MEMBER_2]] -> %[[ARG3:.*]] : {{.*}}, {{.*}}, !fir.ref, !fir.ref) { subroutine mapType_multilpe_derived_nested_explicit_member type :: nested integer(4) :: int diff --git a/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90 b/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90 index 120177a0420bf..2549550267b6c 100644 --- a/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90 +++ b/flang/test/Lower/OpenMP/distribute-parallel-do-simd.f90 @@ -105,7 +105,7 @@ subroutine lastprivate_cond_in_composite_construct(x_min, x_max, y_min, y_max) integer :: x_min,x_max,y_min,y_max integer :: i,j -! CHECK: omp.target {{.*}} { +! CHECK: omp.target kernel_type(spmd) {{.*}} { ! CHECK: %[[X_MAX_MAPPED:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}x_max"} ! CHECK: omp.teams { ! CHECK: omp.parallel { diff --git a/flang/test/Lower/OpenMP/local-intrinsic-sized-array-map.f90 b/flang/test/Lower/OpenMP/local-intrinsic-sized-array-map.f90 index 76dba67df5d07..181038943354f 100644 --- a/flang/test/Lower/OpenMP/local-intrinsic-sized-array-map.f90 +++ b/flang/test/Lower/OpenMP/local-intrinsic-sized-array-map.f90 @@ -12,7 +12,7 @@ !HLFIRDIALECT: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}} : index) upper_bound({{.*}} : index) extent({{.*}} : index) stride({{.*}} : index) start_idx({{.*}} : index) {stride_in_bytes = true} !HLFIRDIALECT: %[[MAP_DATA_B:.*]] = omp.map.info var_ptr(%[[B_DECLARE]]#1 : !fir.ref>, f32) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "b"} !HLFIRDIALECT: %[[MAP_DATA_SZ:.*]] = omp.map.info var_ptr(%[[SZ_DATA]] : !fir.ref, index) map_clauses(implicit) capture(ByCopy) -> !fir.ref {name = ""} -!HLFIRDIALECT: omp.target map_entries(%[[MAP_DATA_B]] -> %[[ARG1:.*]], %[[MAP_DATA_SZ]] -> %[[ARG2:.*]] : !fir.ref>, !fir.ref) { +!HLFIRDIALECT: omp.target kernel_type(generic) map_entries(%[[MAP_DATA_B]] -> %[[ARG1:.*]], %[[MAP_DATA_SZ]] -> %[[ARG2:.*]] : !fir.ref>, !fir.ref) { !HLFIRDIALECT: %[[SZ_LD:.*]] = fir.load %[[ARG2]] : !fir.ref !HLFIRDIALECT: %[[SZ_CONV:.*]] = fir.convert %[[SZ_LD]] : (index) -> i64 !HLFIRDIALECT: %[[SZ_CONV2:.*]] = fir.convert %[[SZ_CONV]] : (i64) -> index diff --git a/flang/test/Lower/OpenMP/location.f90 b/flang/test/Lower/OpenMP/location.f90 index fc7dd43499863..21cb9d50bad82 100644 --- a/flang/test/Lower/OpenMP/location.f90 +++ b/flang/test/Lower/OpenMP/location.f90 @@ -17,7 +17,7 @@ subroutine sub_parallel() !CHECK-LABEL: sub_target subroutine sub_target() print *, x -!CHECK: omp.target {{.*}} { +!CHECK: omp.target kernel_type(generic) {{.*}} { !$omp target print *, x !CHECK: omp.terminator loc(#[[TAR_LOC:.*]]) diff --git a/flang/test/Lower/OpenMP/map-character.f90 b/flang/test/Lower/OpenMP/map-character.f90 index c4197261d2099..9ab44caa38de6 100644 --- a/flang/test/Lower/OpenMP/map-character.f90 +++ b/flang/test/Lower/OpenMP/map-character.f90 @@ -56,7 +56,7 @@ end subroutine TestOfCharacter !CHECK-SAME: capture(ByRef) var_ptr_ptr(%[[A0_BOX_ADDR]] : !fir.llvm_ptr>>) bounds(%24) -> !fir.llvm_ptr>> {name = ""} !CHECK: %[[A0_BOXCHAR_MAP_2:.*]] = omp.map.info var_ptr(%[[A0_BOXCHAR_ALLOCA]] : !fir.ref>, !fir.boxchar<1>) map_clauses(always, implicit, to) !CHECK-SAME: capture(ByRef) members(%[[A0_BOXCHAR_MAP]] : [0] : !fir.llvm_ptr>>) -> !fir.ref> {name = ""} -!CHECK: omp.target map_entries(%[[A0_MAP]] -> %[[TGT_A0:.*]], %[[A1_MAP]] -> %[[TGT_A1:.*]], %[[A1_BOXCHAR_MAP_2]] -> %[[TGT_A1_BOXCHAR:.*]], %[[A0_BOXCHAR_MAP_2]] -> %[[TGT_A0_BOXCHAR:.*]], %[[A1_BOXCHAR_MAP]] -> %[[TGT_A1_BOXCHAR2:.*]], %[[A0_BOXCHAR_MAP]] -> %[[TGT_A0_BOXCHAR2:.*]] : !fir.ref>, !fir.ref>, !fir.ref>, !fir.ref>, !fir.llvm_ptr>>, !fir.llvm_ptr>>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[A0_MAP]] -> %[[TGT_A0:.*]], %[[A1_MAP]] -> %[[TGT_A1:.*]], %[[A1_BOXCHAR_MAP_2]] -> %[[TGT_A1_BOXCHAR:.*]], %[[A0_BOXCHAR_MAP_2]] -> %[[TGT_A0_BOXCHAR:.*]], %[[A1_BOXCHAR_MAP]] -> %[[TGT_A1_BOXCHAR2:.*]], %[[A0_BOXCHAR_MAP]] -> %[[TGT_A0_BOXCHAR2:.*]] : !fir.ref>, !fir.ref>, !fir.ref>, !fir.ref>, !fir.llvm_ptr>>, !fir.llvm_ptr>>) { !CHECK: %[[TGT_A0_BC_LD:.*]] = fir.load %[[TGT_A0_BOXCHAR]] : !fir.ref> !CHECK: %[[TGT_A1_BC_LD:.*]] = fir.load %[[TGT_A1_BOXCHAR]] : !fir.ref> !CHECK: %[[UNBOXED_TGT_A1:.*]]:2 = fir.unboxchar %[[TGT_A1_BC_LD]] : (!fir.boxchar<1>) -> (!fir.ref>, index) diff --git a/flang/test/Lower/OpenMP/map-component-ref.f90 b/flang/test/Lower/OpenMP/map-component-ref.f90 index b7a7ee06b02f2..b0ec9ec4939d6 100644 --- a/flang/test/Lower/OpenMP/map-component-ref.f90 +++ b/flang/test/Lower/OpenMP/map-component-ref.f90 @@ -7,7 +7,7 @@ ! CHECK: %[[V2:[0-9]+]] = hlfir.designate %[[V1]]#0{"a1"} : (!fir.ref>) -> !fir.ref ! CHECK: %[[V3:[0-9]+]] = omp.map.info var_ptr(%[[V2]] : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "a%a1"} ! CHECK: %[[V4:[0-9]+]] = omp.map.info var_ptr(%[[V1]]#1 : !fir.ref>, !fir.type<_QFfoo1Tt0{a0:i32,a1:i32}>) map_clauses(tofrom) capture(ByRef) members(%[[V3]] : [1] : !fir.ref) -> !fir.ref> {name = "a", partial_map = true} -! CHECK: omp.target map_entries(%[[V4]] -> %arg0, %[[V3]] -> %arg1 : !fir.ref>, !fir.ref) { +! CHECK: omp.target kernel_type(generic) map_entries(%[[V4]] -> %arg0, %[[V3]] -> %arg1 : !fir.ref>, !fir.ref) { ! CHECK: %[[V5:[0-9]+]]:2 = hlfir.declare %arg0 {uniq_name = "_QFfoo1Ea"} : (!fir.ref>) -> (!fir.ref>, !fir.ref>) ! CHECK: %c0_i32 = arith.constant 0 : i32 ! CHECK: %[[V6:[0-9]+]] = hlfir.designate %[[V5]]#0{"a1"} : (!fir.ref>) -> !fir.ref diff --git a/flang/test/Lower/OpenMP/map-descriptor-deferral.f90 b/flang/test/Lower/OpenMP/map-descriptor-deferral.f90 index a7165c391f1af..ebac416facc50 100644 --- a/flang/test/Lower/OpenMP/map-descriptor-deferral.f90 +++ b/flang/test/Lower/OpenMP/map-descriptor-deferral.f90 @@ -24,7 +24,7 @@ subroutine assume_map_target_enter_exit(assumed_arr) !CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref>>) -> !fir.llvm_ptr>> !CHECK: %[[MAP_ADDR:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr>>) bounds(%{{.*}}) -> !fir.llvm_ptr>> {name = ""} !CHECK: %[[MAP_BOX:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref>>, !fir.box>) map_clauses(always, implicit, to) capture(ByRef) members(%{{.*}} : [0] : !fir.llvm_ptr>>) -> !fir.ref> {name = "assumed_arr"} -!CHECK: omp.target map_entries(%[[MAP_BOX]] -> %{{.*}}, %[[MAP_ADDR]] -> %{{.*}} : !fir.ref>, !fir.llvm_ptr>>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_BOX]] -> %{{.*}}, %[[MAP_ADDR]] -> %{{.*}} : !fir.ref>, !fir.llvm_ptr>>) { !CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref>>) -> !fir.llvm_ptr>> !CHECK: %[[LOAD_BOX:.*]] = fir.load %[[BOX_ADDR]] : !fir.llvm_ptr>> !CHECK: %[[MAP_ADDR:.*]] = omp.map.info var_ptr(%[[LOAD_BOX]] : !fir.ref>, i32) map_clauses(from) capture(ByRef) bounds(%{{.*}}) -> !fir.ref> {name = "assumed_arr"} @@ -47,7 +47,7 @@ subroutine assume_alloca_map_target_enter_exit(assumed_arr) !CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr>>) bounds(%{{.*}}) -> !fir.llvm_ptr>> {name = ""} !CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref>>>, !fir.box>>) map_clauses(always, implicit, to) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr>>) -> !fir.ref>>> {name = "assumed_arr"} -!CHECK: omp.target map_entries(%[[DESC_MAP]] -> %[[VAL_28:.*]], %[[BOX_ADDR_MAP]] -> %[[VAL_29:.*]] : !fir.ref>>>, !fir.llvm_ptr>>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[DESC_MAP]] -> %[[VAL_28:.*]], %[[BOX_ADDR_MAP]] -> %[[VAL_29:.*]] : !fir.ref>>>, !fir.llvm_ptr>>) { !CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref>>>, i32) map_clauses(from) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr>>) bounds(%{{.*}}) -> !fir.llvm_ptr>> {name = ""} !CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref>>>, !fir.box>>) map_clauses(from) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr>>) -> !fir.ref>>> {name = "assumed_arr"} @@ -70,7 +70,7 @@ subroutine assume_pointer_map_target_enter_exit(assumed_arr) !CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref>>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr>>) bounds(%{{.*}}) -> !fir.llvm_ptr>> {name = ""} !CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref>>>, !fir.box>>) map_clauses(always, implicit, to) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr>>) -> !fir.ref>>> {name = "assumed_arr"} -!CHECK: omp.target map_entries(%[[DESC_MAP]] -> %[[VAL_28:.*]], %[[BOX_ADDR_MAP]] -> %[[VAL_29:.*]] : !fir.ref>>>, !fir.llvm_ptr>>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[DESC_MAP]] -> %[[VAL_28:.*]], %[[BOX_ADDR_MAP]] -> %[[VAL_29:.*]] : !fir.ref>>>, !fir.llvm_ptr>>) { !CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref>>>) -> !fir.llvm_ptr>> !CHECK: %[[BOX_ADDR_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref>>>, i32) map_clauses(from) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr>>) bounds(%{{.*}}) -> !fir.llvm_ptr>> {name = ""} !CHECK: %[[DESC_MAP:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref>>>, !fir.box>>) map_clauses(from) capture(ByRef) members(%[[BOX_ADDR_MAP]] : [0] : !fir.llvm_ptr>>) -> !fir.ref>>> {name = "assumed_arr"} @@ -93,4 +93,4 @@ subroutine assume_map_target_data(assumed_arr) !CHECK: %[[BOX_ADDR:.*]] = fir.box_offset %{{.*}} base_addr : (!fir.ref>>) -> !fir.llvm_ptr>> !CHECK: %[[MAP_ADDR:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref>>, i32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[BOX_ADDR]] : !fir.llvm_ptr>>) bounds(%{{.*}}) -> !fir.llvm_ptr>> {name = ""} !CHECK: %[[MAP_BOX:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref>>, !fir.box>) map_clauses(always, implicit, to) capture(ByRef) members(%[[MAP_ADDR]] : [0] : !fir.llvm_ptr>>) -> !fir.ref> {name = "assumed_arr"} -!CHECK: omp.target map_entries(%[[MAP_BOX]] -> %{{.*}}, %[[MAP_ADDR]] -> %{{.*}} : !fir.ref>, !fir.llvm_ptr>>) { +!CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_BOX]] -> %{{.*}}, %[[MAP_ADDR]] -> %{{.*}} : !fir.ref>, !fir.llvm_ptr>>) { diff --git a/flang/test/Lower/OpenMP/map-mapper.f90 b/flang/test/Lower/OpenMP/map-mapper.f90 index 8934fbb5d6edf..f5823dff717dd 100644 --- a/flang/test/Lower/OpenMP/map-mapper.f90 +++ b/flang/test/Lower/OpenMP/map-mapper.f90 @@ -13,7 +13,7 @@ program p type(t1) :: a, b !CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}, {{.*}}) map_clauses(tofrom) capture(ByRef) mapper(@_QQFxx) -> {{.*}} {name = "a"} - !CHECK: omp.target map_entries(%[[MAP_A]] -> %{{.*}}, %{{.*}} -> %{{.*}} : {{.*}}, {{.*}}) { + !CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_A]] -> %{{.*}}, %{{.*}} -> %{{.*}} : {{.*}}, {{.*}}) { !$omp target map(mapper(xx) : a) do i = 1, n a%x(i) = i @@ -21,7 +21,7 @@ program p !$omp end target !CHECK: %[[MAP_B:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}, {{.*}}) map_clauses(tofrom) capture(ByRef) mapper(@_QQFt1_omp_default_mapper) -> {{.*}} {name = "b"} - !CHECK: omp.target map_entries(%[[MAP_B]] -> %{{.*}}, %{{.*}} -> %{{.*}} : {{.*}}, {{.*}}) { + !CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_B]] -> %{{.*}}, %{{.*}} -> %{{.*}} : {{.*}}, {{.*}}) { !$omp target map(mapper(default) : b) do i = 1, n b%x(i) = i diff --git a/flang/test/Lower/OpenMP/optional-argument-map-2.f90 b/flang/test/Lower/OpenMP/optional-argument-map-2.f90 index 7b67fd3cad379..9044564d89724 100644 --- a/flang/test/Lower/OpenMP/optional-argument-map-2.f90 +++ b/flang/test/Lower/OpenMP/optional-argument-map-2.f90 @@ -74,7 +74,7 @@ end module mod ! CHECK-FPRIV: %[[VAL_16:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref>) -> !fir.llvm_ptr>> ! CHECK-FPRIV: %[[VAL_17:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.char<1,?>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_16]] : !fir.llvm_ptr>>) bounds(%[[VAL_14]]) -> !fir.llvm_ptr>> {name = ""} ! CHECK-FPRIV: %[[VAL_18:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.boxchar<1>) map_clauses(always, to) capture(ByRef) members(%[[VAL_17]] : [0] : !fir.llvm_ptr>>) -> !fir.ref> -! CHECK-FPRIV: omp.target map_entries(%[[VAL_7]] -> %[[VAL_19:.*]], %[[VAL_18]] -> %[[VAL_20:.*]], %[[VAL_17]] -> %[[VAL_21:.*]] : !fir.ref>, !fir.ref>, !fir.llvm_ptr>>) private(@_QMmodFroutine_boxcharEa_firstprivate_boxchar_c8xU %[[VAL_3]]#0 -> %[[VAL_22:.*]] [map_idx=1] : !fir.boxchar<1>) { +! CHECK-FPRIV: omp.target kernel_type(generic) map_entries(%[[VAL_7]] -> %[[VAL_19:.*]], %[[VAL_18]] -> %[[VAL_20:.*]], %[[VAL_17]] -> %[[VAL_21:.*]] : !fir.ref>, !fir.ref>, !fir.llvm_ptr>>) private(@_QMmodFroutine_boxcharEa_firstprivate_boxchar_c8xU %[[VAL_3]]#0 -> %[[VAL_22:.*]] [map_idx=1] : !fir.boxchar<1>) { ! CHECK-FPRIV: %[[VAL_23:.*]] = arith.constant 4 : index ! CHECK-FPRIV: %[[VAL_24:.*]]:2 = hlfir.declare %[[VAL_19]] typeparams %[[VAL_23]] {uniq_name = "_QMmodFroutine_boxcharEb"} : (!fir.ref>, index) -> (!fir.ref>, !fir.ref>) ! CHECK-FPRIV: %[[VAL_25:.*]]:2 = fir.unboxchar %[[VAL_22]] : (!fir.boxchar<1>) -> (!fir.ref>, index) @@ -105,7 +105,7 @@ end module mod ! CHECK-NO-FPRIV: %[[VAL_22:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref>) -> !fir.llvm_ptr>> ! CHECK-NO-FPRIV: %[[VAL_23:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.char<1,?>) map_clauses(implicit, to) capture(ByRef) var_ptr_ptr(%[[VAL_22]] : !fir.llvm_ptr>>) bounds(%14) -> !fir.llvm_ptr>> {name = ""} ! CHECK-NO-FPRIV: %[[VAL_24:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.boxchar<1>) map_clauses(always, implicit, to) capture(ByRef) members(%[[VAL_23]] : [0] : !fir.llvm_ptr>>) -> !fir.ref> {name = ""} -! CHECK-NO-FPRIV: omp.target map_entries(%[[VAL_7]] -> %[[VAL_25:.*]], %[[VAL_16]] -> %[[VAL_26:.*]], %[[VAL_24]] -> %[[VAL_27:.*]], %[[VAL_23]] -> %[[VAL_28:.*]] : !fir.ref>, !fir.ref>, !fir.ref>, !fir.llvm_ptr>>) { +! CHECK-NO-FPRIV: omp.target kernel_type(generic) map_entries(%[[VAL_7]] -> %[[VAL_25:.*]], %[[VAL_16]] -> %[[VAL_26:.*]], %[[VAL_24]] -> %[[VAL_27:.*]], %[[VAL_23]] -> %[[VAL_28:.*]] : !fir.ref>, !fir.ref>, !fir.ref>, !fir.llvm_ptr>>) { ! CHECK-NO-FPRIV: %[[VAL_29:.*]] = fir.load %[[VAL_27]] : !fir.ref> ! CHECK-NO-FPRIV: %[[VAL_30:.*]]:2 = fir.unboxchar %[[VAL_29]] : (!fir.boxchar<1>) -> (!fir.ref>, index) ! CHECK-NO-FPRIV: %[[VAL_31:.*]] = arith.constant 4 : index diff --git a/flang/test/Lower/OpenMP/optional-argument-map-3.f90 b/flang/test/Lower/OpenMP/optional-argument-map-3.f90 index 4dab002ca175a..dc927df18d3ef 100644 --- a/flang/test/Lower/OpenMP/optional-argument-map-3.f90 +++ b/flang/test/Lower/OpenMP/optional-argument-map-3.f90 @@ -34,7 +34,7 @@ end subroutine foo ! CHECK: %[[VAL_3:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref>>) -> !fir.llvm_ptr>> ! CHECK: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>>, f32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_3]] : !fir.llvm_ptr>>) bounds(%{{.*}}) -> !fir.llvm_ptr>> {name = ""} ! CHECK: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>>, !fir.box>) map_clauses(always, implicit, to) capture(ByRef) members(%[[VAL_4]] : [0] : !fir.llvm_ptr>>) -> !fir.ref> {name = "dt"} -! CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}%[[VAL_5]] -> {{.*}}, %[[VAL_4]] -> {{.*}} : {{.*}}) { +! CHECK: omp.target kernel_type(spmd) host_eval({{.*}}) map_entries({{.*}}%[[VAL_5]] -> {{.*}}, %[[VAL_4]] -> {{.*}} : {{.*}}) { ! CHECK: } else { ! CHECK: %[[VAL_6:.*]] = fir.is_present %[[VAL_1]]#1 : (!fir.box>) -> i1 ! CHECK: fir.if %[[VAL_6]] { @@ -43,4 +43,4 @@ end subroutine foo ! CHECK: %[[VAL_7:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref>>) -> !fir.llvm_ptr>> ! CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>>, f32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_7]] : !fir.llvm_ptr>>) bounds(%{{.*}}) -> !fir.llvm_ptr>> {name = ""} ! CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>>, !fir.box>) map_clauses(always, implicit, to) capture(ByRef) members(%[[VAL_8]] : [0] : !fir.llvm_ptr>>) -> !fir.ref> {name = "dt"} -! CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}, %[[VAL_9]] ->{{.*}}, %[[VAL_8]] -> {{.*}} : {{.*}}) { +! CHECK: omp.target kernel_type(spmd) host_eval({{.*}}) map_entries({{.*}}, %[[VAL_9]] ->{{.*}}, %[[VAL_8]] -> {{.*}} : {{.*}}) { diff --git a/flang/test/Lower/OpenMP/target-map-complex.f90 b/flang/test/Lower/OpenMP/target-map-complex.f90 index fc01bdafe51ed..0f7db36298835 100644 --- a/flang/test/Lower/OpenMP/target-map-complex.f90 +++ b/flang/test/Lower/OpenMP/target-map-complex.f90 @@ -12,10 +12,10 @@ !CHECK: %[[V1:[0-9]+]]:2 = hlfir.declare {{.*}} (!fir.ref>) -> (!fir.ref>, !fir.ref>) !CHECK-FPRIV: %[[V2:[0-9]+]] = omp.map.info var_ptr(%[[V1]]#0 : !fir.ref>, complex) {{.*}} capture(ByCopy) !CHECK-FPRIV: %[[V3:[0-9]+]] = omp.map.info var_ptr(%[[V0]]#0 : !fir.ref>, complex) {{.*}} capture(ByRef) -!CHECK-FPRIV: omp.target map_entries(%[[V2]] -> {{.*}}, %[[V3]] -> {{.*}} : !fir.ref>, !fir.ref>) private(@[[PRIV_32]] %[[V1]]#0 -> %{{.*}} [map_idx=0], @[[PRIV_64]] %[[V0]]#0 -> %{{.*}} [map_idx=1] : !fir.ref>, !fir.ref>) { +!CHECK-FPRIV: omp.target kernel_type(generic) map_entries(%[[V2]] -> {{.*}}, %[[V3]] -> {{.*}} : !fir.ref>, !fir.ref>) private(@[[PRIV_32]] %[[V1]]#0 -> %{{.*}} [map_idx=0], @[[PRIV_64]] %[[V0]]#0 -> %{{.*}} [map_idx=1] : !fir.ref>, !fir.ref>) { !CHECK-NO-FPRIV: %[[V2:[0-9]+]] = omp.map.info var_ptr(%[[V1]]#1 : !fir.ref>, complex) {{.*}} capture(ByCopy) !CHECK-NO-FPRIV: %[[V3:[0-9]+]] = omp.map.info var_ptr(%[[V0]]#1 : !fir.ref>, complex) {{.*}} capture(ByRef) -!CHECK-NO-PRIV: omp.target map_entries(%[[V2]] -> {{.*}}, %[[V3]] -> {{.*}} : !fir.ref>, !fir.ref>) +!CHECK-NO-PRIV: omp.target kernel_type(generic) map_entries(%[[V2]] -> {{.*}}, %[[V3]] -> {{.*}} : !fir.ref>, !fir.ref>) module m implicit none diff --git a/flang/test/Lower/OpenMP/target-parallel-private.f90 b/flang/test/Lower/OpenMP/target-parallel-private.f90 index cc04b77e4a527..f1b7959cee83b 100644 --- a/flang/test/Lower/OpenMP/target-parallel-private.f90 +++ b/flang/test/Lower/OpenMP/target-parallel-private.f90 @@ -15,7 +15,7 @@ subroutine target_parallel_private() ! CHECK: omp.private {type = private} @[[PRIVATIZER:.*]] : {{.*}} -! CHECK: omp.target {{.*}} { +! CHECK: omp.target kernel_type(generic) {{.*}} { ! CHECK: omp.parallel private(@[[PRIVATIZER]] %{{.*}} -> %{{.*}} : {{.*}}) { ! CHECK: } ! CHECK: } diff --git a/flang/test/Lower/OpenMP/target-spmd.f90 b/flang/test/Lower/OpenMP/target-spmd.f90 index 320bd1d26ccd3..88e0a4c9c7d58 100644 --- a/flang/test/Lower/OpenMP/target-spmd.f90 +++ b/flang/test/Lower/OpenMP/target-spmd.f90 @@ -207,27 +207,27 @@ subroutine omp_target_teams_device !$omp target teams device(dev32) !$omp end target teams ! CHECK: %[[DEV32:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV32]] : i32) + ! CHECK: omp.target kernel_type(generic) device(%[[DEV32]] : i32) !$omp target teams device(dev64) !$omp end target teams ! CHECK: %[[DEV64:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV64]] : i64) + ! CHECK: omp.target kernel_type(generic) device(%[[DEV64]] : i64) !$omp target teams device(dev16) !$omp end target teams ! CHECK: %[[DEV16:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV16]] : i16) + ! CHECK: omp.target kernel_type(generic) device(%[[DEV16]] : i16) !$omp target teams device(2) !$omp end target teams ! CHECK: %[[C2:.*]] = arith.constant 2 : i32 - ! CHECK: omp.target device(%[[C2]] : i32) + ! CHECK: omp.target kernel_type(generic) device(%[[C2]] : i32) !$omp target teams device(5_8) !$omp end target teams ! CHECK: %[[C5:.*]] = arith.constant 5 : i64 - ! CHECK: omp.target device(%[[C5]] : i64) + ! CHECK: omp.target kernel_type(generic) device(%[[C5]] : i64) end subroutine omp_target_teams_device @@ -251,7 +251,7 @@ subroutine omp_target_teams_distribute_device end do !$omp end target teams distribute ! CHECK: %[[DEV32:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV32]] : i32) + ! CHECK: omp.target kernel_type(generic) device(%[[DEV32]] : i32) ! CHECK: omp.teams ! CHECK: omp.distribute ! CHECK: omp.loop_nest @@ -261,28 +261,28 @@ subroutine omp_target_teams_distribute_device end do !$omp end target teams distribute ! CHECK: %[[DEV64:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV64]] : i64) + ! CHECK: omp.target kernel_type(generic) device(%[[DEV64]] : i64) !$omp target teams distribute device(dev16) do i = 1, 1 end do !$omp end target teams distribute ! CHECK: %[[DEV16:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV16]] : i16) + ! CHECK: omp.target kernel_type(generic) device(%[[DEV16]] : i16) !$omp target teams distribute device(2) do i = 1, 1 end do !$omp end target teams distribute ! CHECK: %[[C2:.*]] = arith.constant 2 : i32 - ! CHECK: omp.target device(%[[C2]] : i32) + ! CHECK: omp.target kernel_type(generic) device(%[[C2]] : i32) !$omp target teams distribute device(5_8) do i = 1, 1 end do !$omp end target teams distribute ! CHECK: %[[C5:.*]] = arith.constant 5 : i64 - ! CHECK: omp.target device(%[[C5]] : i64) + ! CHECK: omp.target kernel_type(generic) device(%[[C5]] : i64) end subroutine omp_target_teams_distribute_device @@ -306,7 +306,7 @@ subroutine omp_target_teams_distribute_parallel_do_device end do !$omp end target teams distribute parallel do ! CHECK: %[[DEV32:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV32]] : i32) + ! CHECK: omp.target kernel_type(spmd) device(%[[DEV32]] : i32) ! CHECK: omp.teams ! CHECK: omp.parallel ! CHECK: omp.distribute @@ -318,28 +318,28 @@ subroutine omp_target_teams_distribute_parallel_do_device end do !$omp end target teams distribute parallel do ! CHECK: %[[DEV64:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV64]] : i64) + ! CHECK: omp.target kernel_type(spmd) device(%[[DEV64]] : i64) !$omp target teams distribute parallel do device(dev16) do i = 1, 1 end do !$omp end target teams distribute parallel do ! CHECK: %[[DEV16:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV16]] : i16) + ! CHECK: omp.target kernel_type(spmd) device(%[[DEV16]] : i16) !$omp target teams distribute parallel do device(2) do i = 1, 1 end do !$omp end target teams distribute parallel do ! CHECK: %[[C2:.*]] = arith.constant 2 : i32 - ! CHECK: omp.target device(%[[C2]] : i32) + ! CHECK: omp.target kernel_type(spmd) device(%[[C2]] : i32) !$omp target teams distribute parallel do device(5_8) do i = 1, 1 end do !$omp end target teams distribute parallel do ! CHECK: %[[C5:.*]] = arith.constant 5 : i64 - ! CHECK: omp.target device(%[[C5]] : i64) + ! CHECK: omp.target kernel_type(spmd) device(%[[C5]] : i64) end subroutine omp_target_teams_distribute_parallel_do_device @@ -363,7 +363,7 @@ subroutine omp_target_teams_distribute_parallel_do_simd_device end do !$omp end target teams distribute parallel do simd ! CHECK: %[[DEV32:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV32]] : i32) + ! CHECK: omp.target kernel_type(spmd) device(%[[DEV32]] : i32) ! CHECK: omp.teams ! CHECK: omp.parallel ! CHECK: omp.distribute @@ -376,27 +376,27 @@ subroutine omp_target_teams_distribute_parallel_do_simd_device end do !$omp end target teams distribute parallel do simd ! CHECK: %[[DEV64:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV64]] : i64) + ! CHECK: omp.target kernel_type(spmd) device(%[[DEV64]] : i64) !$omp target teams distribute parallel do simd device(dev16) do i = 1, 1 end do !$omp end target teams distribute parallel do simd ! CHECK: %[[DEV16:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV16]] : i16) + ! CHECK: omp.target kernel_type(spmd) device(%[[DEV16]] : i16) !$omp target teams distribute parallel do simd device(2) do i = 1, 1 end do !$omp end target teams distribute parallel do simd ! CHECK: %[[C2:.*]] = arith.constant 2 : i32 - ! CHECK: omp.target device(%[[C2]] : i32) + ! CHECK: omp.target kernel_type(spmd) device(%[[C2]] : i32) !$omp target teams distribute parallel do simd device(5_8) do i = 1, 1 end do !$omp end target teams distribute parallel do simd ! CHECK: %[[C5:.*]] = arith.constant 5 : i64 - ! CHECK: omp.target device(%[[C5]] : i64) + ! CHECK: omp.target kernel_type(spmd) device(%[[C5]] : i64) end subroutine omp_target_teams_distribute_parallel_do_simd_device diff --git a/flang/test/Lower/OpenMP/target-teams-private.f90 b/flang/test/Lower/OpenMP/target-teams-private.f90 index 65d97649b5cf3..c4306348c6d44 100644 --- a/flang/test/Lower/OpenMP/target-teams-private.f90 +++ b/flang/test/Lower/OpenMP/target-teams-private.f90 @@ -13,7 +13,7 @@ subroutine target_teams_private() !$omp end target teams end subroutine -! CHECK: omp.target {{.*}} { +! CHECK: omp.target kernel_type(generic) {{.*}} { ! CHECK: omp.teams { ! CHECK: %{{.*}} = fir.alloca !fir.array<3xi32> {bindc_name = "i", {{.*}}} ! CHECK: } diff --git a/flang/test/Lower/OpenMP/target.f90 b/flang/test/Lower/OpenMP/target.f90 index e7168f3944037..f2ebc2c0efde9 100644 --- a/flang/test/Lower/OpenMP/target.f90 +++ b/flang/test/Lower/OpenMP/target.f90 @@ -358,7 +358,7 @@ subroutine omp_target integer :: a(1024) !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound({{.*}}) upper_bound({{.*}}) extent({{.*}}) stride({{.*}}) start_idx({{.*}}) !CHECK: %[[MAP:.*]] = omp.map.info var_ptr(%[[VAL_1]]#1 : !fir.ref>, !fir.array<1024xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} - !CHECK: omp.target map_entries(%[[MAP]] -> %[[ARG_0:.*]] : !fir.ref>) { + !CHECK: omp.target kernel_type(generic) map_entries(%[[MAP]] -> %[[ARG_0:.*]] : !fir.ref>) { !$omp target map(tofrom: a) !CHECK: %[[VAL_7:.*]] = arith.constant 1024 : index !CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1> @@ -391,7 +391,7 @@ subroutine omp_target_depend !CHECK: %[[UBOUND_A:.*]] = arith.subi %c1024, %c1 : index !CHECK: %[[BOUNDS_A:.*]] = omp.map.bounds lower_bound(%[[LBOUND_A]] : index) upper_bound(%[[UBOUND_A]] : index) extent(%[[EXTENT_A]] : index) stride(%[[STRIDE_A]] : index) start_idx(%[[STRIDE_A]] : index) !CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[A]]#1 : !fir.ref>, !fir.array<1024xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS_A]]) -> !fir.ref> {name = "a"} - !CHECK: omp.target depend(taskdependin -> %[[A]]#0 : !fir.ref>) map_entries(%[[MAP_A]] -> %[[BB0_ARG:.*]] : !fir.ref>) { + !CHECK: omp.target kernel_type(generic) depend(taskdependin -> %[[A]]#0 : !fir.ref>) map_entries(%[[MAP_A]] -> %[[BB0_ARG:.*]] : !fir.ref>) { !$omp target map(tofrom: a) depend(in: a) a(1) = 10 !CHECK: omp.terminator @@ -411,7 +411,7 @@ subroutine omp_target_implicit !CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_1]](%[[VAL_2]]) {uniq_name = "_QFomp_target_implicitEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) integer :: a(1024) !CHECK: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_3]]#1 : !fir.ref>, !fir.array<1024xi32>) map_clauses(implicit, tofrom) capture(ByRef) bounds(%{{.*}}) -> !fir.ref> {name = "a"} - !CHECK: omp.target map_entries(%[[VAL_4]] -> %[[VAL_6:.*]] : !fir.ref>) { + !CHECK: omp.target kernel_type(generic) map_entries(%[[VAL_4]] -> %[[VAL_6:.*]] : !fir.ref>) { !$omp target !CHECK: %[[VAL_7:.*]] = arith.constant 1024 : index !CHECK: %[[VAL_8:.*]] = fir.shape %[[VAL_7]] : (index) -> !fir.shape<1> @@ -434,8 +434,8 @@ end subroutine omp_target_implicit subroutine omp_target_implicit_nested integer::a, b - !CHECK-NO-FPRIV: omp.target map_entries(%{{.*}} -> %[[ARG0:.*]], %{{.*}} -> %[[ARG1:.*]] : !fir.ref, !fir.ref) { - !CHECK-FPRIV: omp.target map_entries(%{{.*}} -> %[[ARG0:.*]], %{{.*}} -> %[[ARG1:.*]] : !fir.ref, !fir.ref) private(@{{.*}} %{{.*}} -> %[[ARG2:.*]] [map_idx=0], @{{.*}} %{{.*}} -> %[[ARG3:.*]] [map_idx=1] : !fir.ref, !fir.ref) { + !CHECK-NO-FPRIV: omp.target kernel_type(generic) map_entries(%{{.*}} -> %[[ARG0:.*]], %{{.*}} -> %[[ARG1:.*]] : !fir.ref, !fir.ref) { + !CHECK-FPRIV: omp.target kernel_type(generic) map_entries(%{{.*}} -> %[[ARG0:.*]], %{{.*}} -> %[[ARG1:.*]] : !fir.ref, !fir.ref) private(@{{.*}} %{{.*}} -> %[[ARG2:.*]] [map_idx=0], @{{.*}} %{{.*}} -> %[[ARG3:.*]] [map_idx=1] : !fir.ref, !fir.ref) { !$omp target !CHECK-NO-FPRIV: %[[VAL_8:.*]]:2 = hlfir.declare %[[ARG0]] {uniq_name = "_QFomp_target_implicit_nestedEa"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK-NO-FPRIV: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG1]] {uniq_name = "_QFomp_target_implicit_nestedEb"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -483,7 +483,7 @@ subroutine omp_target_implicit_bounds(n) !CHECK: %[[VAL_14:.*]] = omp.map.bounds lower_bound(%c0{{.*}} : index) upper_bound(%[[UB]] : index) extent(%[[VAL_7]] : index) stride(%c1{{.*}} : index) start_idx(%c1{{.*}} : index) !CHECK: %[[VAL_15:.*]] = omp.map.info var_ptr(%[[VAL_10]]#1 : !fir.ref>, i32) map_clauses(implicit, tofrom) capture(ByRef) bounds(%[[VAL_14]]) -> !fir.ref> {name = "a"} !CHECK: %[[VAL_16:.*]] = omp.map.info var_ptr(%[[VAL_COPY]] : !fir.ref, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref {name = ""} - !CHECK: omp.target map_entries(%[[VAL_15]] -> %[[VAL_17:.*]], %[[VAL_16]] -> %[[VAL_18:.*]] : !fir.ref>, !fir.ref) { + !CHECK: omp.target kernel_type(generic) map_entries(%[[VAL_15]] -> %[[VAL_17:.*]], %[[VAL_16]] -> %[[VAL_18:.*]] : !fir.ref>, !fir.ref) { !$omp target !CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_18]] : !fir.ref !CHECK: %[[VAL_20:.*]] = fir.convert %[[VAL_19]] : (i32) -> i64 @@ -512,7 +512,7 @@ subroutine omp_target_thread_limit integer :: a !CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}}) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "a"} !CHECK: %[[VAL_1:.*]] = arith.constant 64 : i32 - !CHECK: omp.target thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] -> %{{.*}} : !fir.ref) { + !CHECK: omp.target kernel_type(generic) thread_limit(%[[VAL_1]] : i32) map_entries(%[[MAP]] -> %{{.*}} : !fir.ref) { !$omp target map(tofrom: a) thread_limit(64) a = 10 !CHECK: omp.terminator @@ -584,7 +584,7 @@ subroutine omp_target_is_device_ptr !CHECK: %[[P_STORAGE:.*]] = omp.map.info {{.*}}{name = "p"} !CHECK: %[[P_IS:.*]] = omp.map.info {{.*}}{name = "p"} !CHECK: %[[ARR_MAP:.*]] = omp.map.info {{.*}}{name = "arr"} - !CHECK: omp.target is_device_ptr(%[[P_IS]] : + !CHECK: omp.target kernel_type(generic) is_device_ptr(%[[P_IS]] : !CHECK-SAME: has_device_addr(%[[P_STORAGE]] -> !CHECK-SAME: map_entries({{.*}}%[[ARR_MAP]] -> !$omp target is_device_ptr(p) @@ -636,7 +636,7 @@ subroutine omp_target_parallel_do !CHECK: %[[SUB:.*]] = arith.subi %[[C1024]], %[[C1]] : index !CHECK: %[[BOUNDS:.*]] = omp.map.bounds lower_bound(%[[C0]] : index) upper_bound(%[[SUB]] : index) extent(%[[C1024]] : index) stride(%[[C1]] : index) start_idx(%[[C1]] : index) !CHECK: %[[MAP:.*]] = omp.map.info var_ptr(%[[VAL_0_DECL]]#1 : !fir.ref>, !fir.array<1024xi32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS]]) -> !fir.ref> {name = "a"} - !CHECK: omp.target map_entries(%[[MAP]] -> %[[ARG_0:.*]], %{{.*}} -> %{{.*}} : !fir.ref>, !fir.ref) { + !CHECK: omp.target kernel_type(spmd) host_eval({{.*}}) map_entries(%[[MAP]] -> %[[ARG_0:.*]], %{{.*}} -> %{{.*}} : !fir.ref>, !fir.ref) { !CHECK: %[[VAL_0_DECL:.*]]:2 = hlfir.declare %[[ARG_0]](%{{.*}}) {uniq_name = "_QFomp_target_parallel_doEa"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) !CHECK: omp.parallel !$omp target parallel do map(tofrom: a) @@ -674,10 +674,10 @@ subroutine target_unstructured integer :: j = 11 !CHECK-NO-FPRIV: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_1]]#1 : !fir.ref, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref {name = "i"} !CHECK-NO-FPRIV: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_3]]#1 : !fir.ref, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref {name = "j"} - !CHECK-NO-FPRIV: omp.target map_entries(%[[VAL_4]] -> %[[VAL_6:.*]], %[[VAL_5]] -> %[[VAL_7:.*]] : !fir.ref, !fir.ref) { + !CHECK-NO-FPRIV: omp.target kernel_type(generic) map_entries(%[[VAL_4]] -> %[[VAL_6:.*]], %[[VAL_5]] -> %[[VAL_7:.*]] : !fir.ref, !fir.ref) { !CHECK-FPRIV: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_1]]#0 : !fir.ref, i32) map_clauses(to) capture(ByCopy) -> !fir.ref !CHECK-FPRIV: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_3]]#0 : !fir.ref, i32) map_clauses(to) capture(ByCopy) -> !fir.ref - !CHECK-FPRIV: omp.target map_entries(%[[VAL_4]] -> %[[ARG_0:.*]], %[[VAL_5]] -> %[[ARG_1:.*]] : !fir.ref, !fir.ref) private(@{{.*}} %[[VAL_1]]#0 -> %[[ARG_2:.*]] [map_idx=0], @{{.*}} %[[VAL_3]]#0 -> %[[ARG_3:.*]] [map_idx=1] : !fir.ref, !fir.ref) { + !CHECK-FPRIV: omp.target kernel_type(generic) map_entries(%[[VAL_4]] -> %[[ARG_0:.*]], %[[VAL_5]] -> %[[ARG_1:.*]] : !fir.ref, !fir.ref) private(@{{.*}} %[[VAL_1]]#0 -> %[[ARG_2:.*]] [map_idx=0], @{{.*}} %[[VAL_3]]#0 -> %[[ARG_3:.*]] [map_idx=1] : !fir.ref, !fir.ref) { !$omp target !CHECK-FPRIV: %[[VAL_8:.*]]:2 = hlfir.declare %[[ARG_2]] {uniq_name = "_QFtarget_unstructuredEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) !CHECK-FPRIV: %[[VAL_9:.*]]:2 = hlfir.declare %[[ARG_3]] {uniq_name = "_QFtarget_unstructuredEj"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -712,27 +712,27 @@ subroutine omp_target_device !$omp target device(dev32) !$omp end target ! CHECK: %[[DEV32:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV32]] : i32) + ! CHECK: omp.target kernel_type(generic) device(%[[DEV32]] : i32) !$omp target device(dev64) !$omp end target ! CHECK: %[[DEV64:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV64]] : i64) + ! CHECK: omp.target kernel_type(generic) device(%[[DEV64]] : i64) !$omp target device(dev16) !$omp end target ! CHECK: %[[DEV16:.*]] = fir.load %{{.*}} : !fir.ref - ! CHECK: omp.target device(%[[DEV16]] : i16) + ! CHECK: omp.target kernel_type(generic) device(%[[DEV16]] : i16) !$omp target device(2) !$omp end target ! CHECK: %[[C2:.*]] = arith.constant 2 : i32 - ! CHECK: omp.target device(%[[C2]] : i32) + ! CHECK: omp.target kernel_type(generic) device(%[[C2]] : i32) !$omp target device(5_8) !$omp end target ! CHECK: %[[C5:.*]] = arith.constant 5 : i64 - ! CHECK: omp.target device(%[[C5]] : i64) + ! CHECK: omp.target kernel_type(generic) device(%[[C5]] : i64) end subroutine omp_target_device diff --git a/flang/test/Lower/OpenMP/workdistribute-target-teams-clauses.f90 b/flang/test/Lower/OpenMP/workdistribute-target-teams-clauses.f90 index 4a08e53bc316a..1f84d02b7dacc 100644 --- a/flang/test/Lower/OpenMP/workdistribute-target-teams-clauses.f90 +++ b/flang/test/Lower/OpenMP/workdistribute-target-teams-clauses.f90 @@ -2,7 +2,7 @@ ! CHECK-LABEL: func @_QPtarget_teams_workdistribute ! CHECK: omp.target_data map_entries({{.*}}) -! CHECK: omp.target thread_limit({{.*}}) host_eval({{.*}}) map_entries({{.*}}) +! CHECK: omp.target kernel_type(spmd) thread_limit({{.*}}) host_eval({{.*}}) map_entries({{.*}}) ! CHECK: omp.teams num_teams({{.*}}) ! CHECK: omp.parallel ! CHECK: omp.distribute diff --git a/flang/test/Lower/OpenMP/workdistribute.f90 b/flang/test/Lower/OpenMP/workdistribute.f90 index 7a938b59b8094..9d48c18d6469c 100644 --- a/flang/test/Lower/OpenMP/workdistribute.f90 +++ b/flang/test/Lower/OpenMP/workdistribute.f90 @@ -3,7 +3,7 @@ ! CHECK-LABEL: func @_QPtarget_teams_workdistribute subroutine target_teams_workdistribute() integer :: aa(10), bb(10) - ! CHECK: omp.target + ! CHECK: omp.target kernel_type(generic) ! CHECK: omp.teams ! CHECK: omp.workdistribute !$omp target teams workdistribute diff --git a/flang/test/Transforms/DoConcurrent/host_eval.f90 b/flang/test/Transforms/DoConcurrent/host_eval.f90 index 4eacdd64c6e5f..8f63c31718752 100644 --- a/flang/test/Transforms/DoConcurrent/host_eval.f90 +++ b/flang/test/Transforms/DoConcurrent/host_eval.f90 @@ -18,7 +18,7 @@ program do_concurrent_host_eval end do end program do_concurrent_host_eval -! HOST: omp.target host_eval( +! HOST: omp.target kernel_type(spmd) host_eval( ! HOST-SAME: %{{[^[:space:]]+}} -> %[[I_LB:[^,]+]], ! HOST-SAME: %{{[^[:space:]]+}} -> %[[I_UB:[^,]+]], ! HOST-SAME: %{{[^[:space:]]+}} -> %[[I_ST:[^,]+]], @@ -30,7 +30,7 @@ end program do_concurrent_host_eval ! HOST-SAME: (%[[I_UB]], %[[J_UB]]) inclusive step ! HOST-SAME: (%[[I_ST]], %[[J_ST]]) -! DEVICE: omp.target map_entries( +! DEVICE: omp.target kernel_type(spmd) map_entries( ! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_LB_MAP:[^,]+]], ! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_UB_MAP:[^,]+]], ! DEVICE-SAME: %{{[^[:space:]]+}} -> %[[I_ST_MAP:[^,]+]], diff --git a/flang/test/Transforms/DoConcurrent/local_device.mlir b/flang/test/Transforms/DoConcurrent/local_device.mlir index e54bb1aeb414e..6da3db7f51e0b 100644 --- a/flang/test/Transforms/DoConcurrent/local_device.mlir +++ b/flang/test/Transforms/DoConcurrent/local_device.mlir @@ -30,7 +30,7 @@ func.func @_QPfoo() { // CHECK: %[[LOCAL_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}my_local"} // CHECK: %[[LOCAL_MAP:.*]] = omp.map.info var_ptr(%[[LOCAL_DECL]]#1 : {{.*}}) -// CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}, %[[LOCAL_MAP]] -> %[[LOCAL_MAP_ARG:.*]] : {{.*}}) { +// CHECK: omp.target kernel_type(spmd) host_eval({{.*}}) map_entries({{.*}}, %[[LOCAL_MAP]] -> %[[LOCAL_MAP_ARG:.*]] : {{.*}}) { // CHECK: %[[LOCAL_DEV_DECL:.*]]:2 = hlfir.declare %[[LOCAL_MAP_ARG]] {uniq_name = "_QFfooEmy_local"} // CHECK: omp.teams { diff --git a/flang/test/Transforms/DoConcurrent/map_shape_info.f90 b/flang/test/Transforms/DoConcurrent/map_shape_info.f90 index 95bfc236888d1..0fd842e957ee2 100644 --- a/flang/test/Transforms/DoConcurrent/map_shape_info.f90 +++ b/flang/test/Transforms/DoConcurrent/map_shape_info.f90 @@ -38,7 +38,7 @@ end program do_concurrent_shape ! CHECK-SAME: map_clauses(implicit) ! CHECK-SAME: capture(ByCopy) -> !fir.ref {name = "_QFEa.extent.dim1"} -! CHECK: omp.target host_eval({{.*}}) map_entries( +! CHECK: omp.target kernel_type(spmd) host_eval({{.*}}) map_entries( ! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, ! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, ! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, @@ -87,7 +87,7 @@ end subroutine do_concurrent_shape_shift ! CHECK-SAME: map_clauses(implicit) ! CHECK-SAME: capture(ByCopy) -> !fir.ref {name = "_QF{{.*}}Ea.extent.dim0"} -! CHECK: omp.target host_eval({{.*}}) map_entries( +! CHECK: omp.target kernel_type(spmd) host_eval({{.*}}) map_entries( ! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, ! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, ! CHECK-SAME: %{{[^[:space:]]+}} -> %{{[^,]+}}, diff --git a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 index 7de38bf064bb6..a2df3b092dda3 100644 --- a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 +++ b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 @@ -40,7 +40,7 @@ program main ! COMMON: %[[UB_K:.*]] = fir.convert %[[C60]] : (i32) -> index ! COMMON: %[[STEP_K:.*]] = arith.constant 1 : index -! DEVICE: omp.target host_eval( +! DEVICE: omp.target kernel_type(spmd) host_eval( ! DEVICE-SAME: %[[LB_I]] -> %[[LB_I:[[:alnum:]]+]], ! DEVICE-SAME: %[[UB_I]] -> %[[UB_I:[[:alnum:]]+]], ! DEVICE-SAME: %[[STEP_I]] -> %[[STEP_I:[[:alnum:]]+]], diff --git a/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90 b/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90 index af48eb4852e91..b4eb4557f975c 100644 --- a/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90 +++ b/flang/test/Transforms/DoConcurrent/non_reference_to_device.f90 @@ -28,7 +28,7 @@ end subroutine test_non_refernece ! CHECK-SAME: capture(ByCopy) -> !fir.ref {name = ""} -! CHECK: omp.target host_eval({{.*}} : index, index, index) +! CHECK: omp.target kernel_type(spmd) host_eval({{.*}} : index, index, index) ! CHECK-SAME: map_entries({{.*}}, %[[DIM_MAP]] -> %{{.*}} : ! CHECK-SAME: !fir.ref, !fir.ref) diff --git a/flang/test/Transforms/DoConcurrent/reduce_device.mlir b/flang/test/Transforms/DoConcurrent/reduce_device.mlir index 3e46692a15dca..82d8c6cf87fa9 100644 --- a/flang/test/Transforms/DoConcurrent/reduce_device.mlir +++ b/flang/test/Transforms/DoConcurrent/reduce_device.mlir @@ -37,7 +37,7 @@ func.func @_QPfoo() { // CHECK: %[[S_DECL:.*]]:2 = hlfir.declare %6 {uniq_name = "_QFfooEs"} // CHECK: %[[S_MAP:.*]] = omp.map.info var_ptr(%[[S_DECL]]#1 -// CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}, %[[S_MAP]] -> %[[S_TARGET_ARG:.*]] : {{.*}}) { +// CHECK: omp.target kernel_type(spmd) host_eval({{.*}}) map_entries({{.*}}, %[[S_MAP]] -> %[[S_TARGET_ARG:.*]] : {{.*}}) { // CHECK: %[[S_DEV_DECL:.*]]:2 = hlfir.declare %[[S_TARGET_ARG]] // CHECK: omp.teams reduction(@[[OMP_RED]] %[[S_DEV_DECL]]#0 -> %[[RED_TEAMS_ARG:.*]] : !fir.ref) { // CHECK: omp.parallel { diff --git a/flang/test/Transforms/OpenMP/delete-unreachable-targets.mlir b/flang/test/Transforms/OpenMP/delete-unreachable-targets.mlir index 55e4bdf5d65de..6db167b51039d 100644 --- a/flang/test/Transforms/OpenMP/delete-unreachable-targets.mlir +++ b/flang/test/Transforms/OpenMP/delete-unreachable-targets.mlir @@ -12,7 +12,7 @@ func.func @test_if_false_simple() { // CHECK-NOT: omp.target // CHECK: } fir.if %false { - omp.target { + omp.target kernel_type(generic) { omp.terminator } } @@ -27,7 +27,7 @@ func.func @test_if_true_simple() { // The target should remain since the branch is reachable // CHECK: omp.target fir.if %true { - omp.target { + omp.target kernel_type(generic) { omp.terminator } } @@ -46,7 +46,7 @@ func.func @test_nested_outer_false() { // CHECK: } fir.if %false { fir.if %true { - omp.target { + omp.target kernel_type(generic) { omp.terminator } } @@ -67,7 +67,7 @@ func.func @test_nested_inner_false() { // CHECK: } fir.if %true { fir.if %false { - omp.target { + omp.target kernel_type(generic) { omp.terminator } } @@ -84,7 +84,7 @@ func.func @test_nested_both_true() { // CHECK: omp.target fir.if %true1 { fir.if %true2 { - omp.target { + omp.target kernel_type(generic) { omp.terminator } } @@ -101,14 +101,14 @@ func.func @test_mixed_targets() { // Live target - should remain (expect 2 targets total in output) // CHECK: omp.target - omp.target { + omp.target kernel_type(generic) { omp.terminator } // Another live target in if (true) - should remain // CHECK: omp.target fir.if %true { - omp.target { + omp.target kernel_type(generic) { omp.terminator } } @@ -116,7 +116,7 @@ func.func @test_mixed_targets() { // Dead target - will be removed // CHECK-NOT: omp.target fir.if %false { - omp.target { + omp.target kernel_type(generic) { omp.terminator } } @@ -133,15 +133,15 @@ func.func @test_multiple_dead_targets() { // All targets inside dead branch should be removed // CHECK-NOT: omp.target fir.if %false { - omp.target { + omp.target kernel_type(generic) { omp.terminator } - omp.target { + omp.target kernel_type(generic) { omp.terminator } - omp.target { + omp.target kernel_type(generic) { omp.terminator } } @@ -157,7 +157,7 @@ func.func @test_if_else_false() { // CHECK: fir.if %false { fir.if %false { // Then branch is unreachable, target should be deleted - omp.target { + omp.target kernel_type(generic) { omp.terminator } } else { @@ -165,7 +165,7 @@ func.func @test_if_else_false() { // CHECK: } else { // Else branch is reachable, target should remain // CHECK: omp.target - omp.target { + omp.target kernel_type(generic) { omp.terminator } } @@ -180,7 +180,7 @@ func.func @test_runtime_condition(%arg0: i1) { // CHECK: fir.if %arg0 { fir.if %arg0 { // CHECK: omp.target - omp.target { + omp.target kernel_type(generic) { omp.terminator } } @@ -202,7 +202,7 @@ func.func @test_nested_in_unreachable_block() { // CHECK-NOT: omp.target // CHECK: cf.br ^bb2 fir.if %true { - omp.target { + omp.target kernel_type(generic) { omp.terminator } } @@ -210,7 +210,7 @@ func.func @test_nested_in_unreachable_block() { ^bb2: // CHECK: ^bb2: // CHECK-NEXT: omp.target - omp.target { + omp.target kernel_type(generic) { omp.terminator } return @@ -226,7 +226,7 @@ func.func @test_unreachable_block_after_branch() { // CHECK: ^bb1: // CHECK-NOT: omp.target // CHECK: cf.br ^bb2 - omp.target { + omp.target kernel_type(generic) { omp.terminator } cf.br ^bb2 @@ -234,7 +234,7 @@ func.func @test_unreachable_block_after_branch() { // This block is reachable // CHECK: ^bb2: // CHECK-NEXT: omp.target - omp.target { + omp.target kernel_type(generic) { omp.terminator } return @@ -250,7 +250,7 @@ func.func @test_multiple_unreachable_blocks() { // CHECK: ^bb1: // CHECK-NOT: omp.target // CHECK: cf.br ^bb2 - omp.target { + omp.target kernel_type(generic) { omp.terminator } cf.br ^bb2 @@ -259,7 +259,7 @@ func.func @test_multiple_unreachable_blocks() { // CHECK: ^bb2: // CHECK-NOT: omp.target // CHECK: return - omp.target { + omp.target kernel_type(generic) { omp.terminator } return @@ -267,7 +267,7 @@ func.func @test_multiple_unreachable_blocks() { // Reachable from entry // CHECK: ^bb3: // CHECK-NEXT: omp.target - omp.target { + omp.target kernel_type(generic) { omp.terminator } return @@ -281,14 +281,14 @@ func.func @test_both_branches_reachable(%arg0: i1) { ^bb1: // CHECK: ^bb1: // CHECK-NEXT: omp.target - omp.target { + omp.target kernel_type(generic) { omp.terminator } cf.br ^bb3 ^bb2: // CHECK: ^bb2: // CHECK-NEXT: omp.target - omp.target { + omp.target kernel_type(generic) { omp.terminator } cf.br ^bb3 @@ -307,7 +307,7 @@ func.func @test_disconnected_block() { // CHECK: ^bb1: // CHECK-NOT: omp.target // CHECK: cf.br ^bb2 - omp.target { + omp.target kernel_type(generic) { omp.terminator } cf.br ^bb2 @@ -315,7 +315,7 @@ func.func @test_disconnected_block() { // Reachable from entry // CHECK: ^bb2: // CHECK-NEXT: omp.target - omp.target { + omp.target kernel_type(generic) { omp.terminator } return diff --git a/flang/test/Transforms/OpenMP/function-filtering-host-ops.mlir b/flang/test/Transforms/OpenMP/function-filtering-host-ops.mlir index 9be258dc800d9..78aa5b8095964 100644 --- a/flang/test/Transforms/OpenMP/function-filtering-host-ops.mlir +++ b/flang/test/Transforms/OpenMP/function-filtering-host-ops.mlir @@ -31,8 +31,8 @@ module attributes {omp.is_target_device = true} { %m2 = omp.map.info var_ptr(%2#1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref %m3 = omp.map.info var_ptr(%alloc : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref - // CHECK-NEXT: omp.target has_device_addr(%[[MAP2]] -> {{.*}} : {{.*}}) map_entries(%[[MAP0]] -> {{.*}}, %[[MAP1]] -> {{.*}}, %[[MAP3]] -> {{.*}} : {{.*}}) - omp.target has_device_addr(%m2 -> %arg0 : !fir.ref) map_entries(%m0 -> %arg1, %m1 -> %arg2, %m3 -> %arg3 : !fir.ref, !fir.ref, !fir.ref) { + // CHECK-NEXT: omp.target kernel_type(generic) has_device_addr(%[[MAP2]] -> {{.*}} : {{.*}}) map_entries(%[[MAP0]] -> {{.*}}, %[[MAP1]] -> {{.*}}, %[[MAP3]] -> {{.*}} : {{.*}}) + omp.target kernel_type(generic) has_device_addr(%m2 -> %arg0 : !fir.ref) map_entries(%m0 -> %arg1, %m1 -> %arg2, %m3 -> %arg3 : !fir.ref, !fir.ref, !fir.ref) { // CHECK-NEXT: func.call func.call @foo() : () -> () omp.terminator @@ -90,7 +90,7 @@ module attributes {omp.is_target_device = true} { // CHECK-NEXT: %[[VAR_PTR_PTR:.*]] = fir.box_offset %[[ALLOCATABLE_DECL]]#1 base_addr : ([[ALLOCATABLE_TYPE]]) -> [[VAR_PTR_PTR_TYPE:.*]] // CHECK-NEXT: %[[MAP_ALLOCATABLE:.*]] = omp.map.info var_ptr(%[[ALLOCATABLE_DECL]]#1 : [[ALLOCATABLE_TYPE]], f32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[VAR_PTR_PTR]] : [[VAR_PTR_PTR_TYPE]]) -> [[VAR_PTR_PTR_TYPE]] // CHECK-NEXT: %[[MAP_ARRAY:.*]] = omp.map.info var_ptr(%[[ARRAY_DECL]]#1 : [[ARRAY_TYPE]], !fir.array<9xi32>) map_clauses(tofrom) capture(ByRef) -> [[ARRAY_TYPE]] - // CHECK-NEXT: omp.target map_entries(%[[MAP_ALLOCATABLE]] -> %{{.*}}, %[[MAP_ARRAY]] -> %{{.*}} : [[VAR_PTR_PTR_TYPE]], [[ARRAY_TYPE]]) + // CHECK-NEXT: omp.target kernel_type(generic) map_entries(%[[MAP_ALLOCATABLE]] -> %{{.*}}, %[[MAP_ARRAY]] -> %{{.*}} : [[VAR_PTR_PTR_TYPE]], [[ARRAY_TYPE]]) %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index %c8 = arith.constant 8 : index @@ -106,7 +106,7 @@ module attributes {omp.is_target_device = true} { %5 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%c8 : index) extent(%c9 : index) stride(%c1 : index) start_idx(%c1 : index) %6 = omp.map.info var_ptr(%4#1 : !fir.ref>, !fir.array<9xi32>) map_clauses(tofrom) capture(ByRef) bounds(%5) -> !fir.ref> - omp.target map_entries(%m0 -> %arg0, %6 -> %arg1 : !fir.llvm_ptr>>, !fir.ref>) { + omp.target kernel_type(generic) map_entries(%m0 -> %arg0, %6 -> %arg1 : !fir.llvm_ptr>>, !fir.ref>) { omp.terminator } return @@ -122,8 +122,8 @@ module attributes {omp.is_target_device = true} { %3:2 = hlfir.declare %x typeparams %c1 dummy_scope %0 {uniq_name = "x"} : (!fir.ref>, index, !fir.dscope) -> (!fir.ref>, !fir.ref>) // CHECK-NEXT: %[[MAP:.*]] = omp.map.info var_ptr(%[[X_DECL]]#1 : [[X_TYPE]], !fir.char<1>) map_clauses(tofrom) capture(ByRef) -> [[X_TYPE]] %map = omp.map.info var_ptr(%3#1 : !fir.ref>, !fir.char<1>) map_clauses(tofrom) capture(ByRef) -> !fir.ref> - // CHECK-NEXT: omp.target map_entries(%[[MAP]] -> %{{.*}}) - omp.target map_entries(%map -> %arg0 : !fir.ref>) { + // CHECK-NEXT: omp.target kernel_type(generic) map_entries(%[[MAP]] -> %{{.*}}) + omp.target kernel_type(generic) map_entries(%map -> %arg0 : !fir.ref>) { omp.terminator } return @@ -145,8 +145,8 @@ module attributes {omp.is_target_device = true} { %5 = omp.map.info var_ptr(%0 : !fir.ref>>, !fir.array<*:f32>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%4 : !fir.llvm_ptr>>) -> !fir.llvm_ptr>> // CHECK-NEXT: %[[MAP1:.*]] = omp.map.info var_ptr(%[[ALLOCA]] : !fir.ref<[[X_TYPE]]>, !fir.box>) {{.*}} members(%[[MAP0]] : [0] : [[VAR_PTR_PTR_TYPE]]) -> !fir.ref> %6 = omp.map.info var_ptr(%0 : !fir.ref>>, !fir.box>) map_clauses(to) capture(ByRef) members(%5 : [0] : !fir.llvm_ptr>>) -> !fir.ref> - // CHECK-NEXT: omp.target map_entries(%[[MAP1]] -> %{{.*}}, %[[MAP0]] -> {{.*}}) - omp.target map_entries(%6 -> %arg1, %5 -> %arg2 : !fir.ref>, !fir.llvm_ptr>>) { + // CHECK-NEXT: omp.target kernel_type(generic) map_entries(%[[MAP1]] -> %{{.*}}, %[[MAP0]] -> {{.*}}) + omp.target kernel_type(generic) map_entries(%6 -> %arg1, %5 -> %arg2 : !fir.ref>, !fir.llvm_ptr>>) { omp.terminator } return @@ -198,8 +198,8 @@ module attributes {omp.is_target_device = true} { %17 = omp.map.info var_ptr(%0 : !fir.ref>>>, i32) map_clauses(implicit, to) capture(ByRef) var_ptr_ptr(%16 : !fir.llvm_ptr>>) bounds(%7) -> !fir.llvm_ptr>> %18 = omp.map.info var_ptr(%0 : !fir.ref>>>, !fir.box>>) map_clauses(implicit, to) capture(ByRef) members(%17 : [0] : !fir.llvm_ptr>>) -> !fir.ref>>> - // CHECK-NEXT: omp.target map_entries(%[[MAP0]] -> %{{.*}}, %[[MAP2]] -> %{{.*}}, %[[MAP1]] -> {{.*}} : [[Y_TYPE]], [[X_TYPE]], [[VAR_PTR_PTR_TYPE]]) - omp.target map_entries(%15 -> %arg1, %18 -> %arg2, %17 -> %arg3 : !fir.ptr>, !fir.ref>>>, !fir.llvm_ptr>>) { + // CHECK-NEXT: omp.target kernel_type(generic) map_entries(%[[MAP0]] -> %{{.*}}, %[[MAP2]] -> %{{.*}}, %[[MAP1]] -> {{.*}} : [[Y_TYPE]], [[X_TYPE]], [[VAR_PTR_PTR_TYPE]]) + omp.target kernel_type(generic) map_entries(%15 -> %arg1, %18 -> %arg2, %17 -> %arg3 : !fir.ptr>, !fir.ref>>>, !fir.llvm_ptr>>) { omp.terminator } return @@ -233,8 +233,8 @@ module attributes {omp.is_target_device = true} { // CHECK-NOT: func.call func.call @foo() : () -> () - // CHECK-NEXT: omp.target map_entries(%[[MAPPED_MAP]] -> %{{.*}}, %[[USEDEVADDR_MAP]] -> %{{.*}}, %[[USEDEVPTR_MAP]] -> %{{.*}} : {{.*}}) - omp.target map_entries(%m3 -> %arg2, %m4 -> %arg3, %m5 -> %arg4 : !fir.ref, !fir.ref, !fir.ref>) { + // CHECK-NEXT: omp.target kernel_type(generic) map_entries(%[[MAPPED_MAP]] -> %{{.*}}, %[[USEDEVADDR_MAP]] -> %{{.*}}, %[[USEDEVPTR_MAP]] -> %{{.*}} : {{.*}}) + omp.target kernel_type(generic) map_entries(%m3 -> %arg2, %m4 -> %arg3, %m5 -> %arg4 : !fir.ref, !fir.ref, !fir.ref>) { omp.terminator } @@ -270,8 +270,8 @@ module attributes {omp.is_target_device = true} { %72 = omp.map.info var_ptr(%23#1 : !fir.ref>>>, f32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%71 : !fir.llvm_ptr>>) bounds(%70) -> !fir.llvm_ptr>> // CHECK-NEXT: %[[MAP1:.*]] = omp.map.info var_ptr(%[[X_DECL]]#1 : [[X_TYPE]], !fir.box>>) {{.*}} members(%[[MAP0]] : [0] : [[VAR_PTR_PTR_TYPE]]) -> [[X_TYPE]] %73 = omp.map.info var_ptr(%23#1 : !fir.ref>>>, !fir.box>>) map_clauses(to) capture(ByRef) members(%72 : [0] : !fir.llvm_ptr>>) -> !fir.ref>>> - // CHECK-NEXT: omp.target map_entries(%[[MAP1]] -> {{.*}}, %[[MAP0]] -> %{{.*}} : [[X_TYPE]], [[VAR_PTR_PTR_TYPE]]) - omp.target map_entries(%73 -> %arg0, %72 -> %arg1 : !fir.ref>>>, !fir.llvm_ptr>>) { + // CHECK-NEXT: omp.target kernel_type(generic) map_entries(%[[MAP1]] -> {{.*}}, %[[MAP0]] -> %{{.*}} : [[X_TYPE]], [[VAR_PTR_PTR_TYPE]]) + omp.target kernel_type(generic) map_entries(%73 -> %arg0, %72 -> %arg1 : !fir.ref>>>, !fir.llvm_ptr>>) { omp.terminator } return @@ -291,8 +291,8 @@ module attributes {omp.is_target_device = true} { ^bb1: // pred: ^bb0 fir.call @foo() : () -> () %m0 = omp.map.info var_ptr(%x_decl#1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref - // CHECK-NEXT: omp.target map_entries(%[[MAP0]] -> {{.*}} : [[X_TYPE]]) - omp.target map_entries(%m0 -> %arg2 : !fir.ref) { + // CHECK-NEXT: omp.target kernel_type(generic) map_entries(%[[MAP0]] -> {{.*}} : [[X_TYPE]]) + omp.target kernel_type(generic) map_entries(%m0 -> %arg2 : !fir.ref) { omp.terminator } fir.call @foo() : () -> () @@ -311,8 +311,8 @@ module attributes {omp.is_target_device = true} { ^bb1: // pred: ^bb0 fir.call @foo() : () -> () %m2 = omp.map.info var_ptr(%x_decl#1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref - // CHECK: omp.target map_entries(%[[MAP1]] -> {{.*}} : [[X_TYPE]]) - omp.target map_entries(%m2 -> %arg2 : !fir.ref) { + // CHECK: omp.target kernel_type(generic) map_entries(%[[MAP1]] -> {{.*}} : [[X_TYPE]]) + omp.target kernel_type(generic) map_entries(%m2 -> %arg2 : !fir.ref) { omp.terminator } // CHECK-NOT: fir.call @@ -344,8 +344,8 @@ module attributes {omp.is_target_device = true} { omp.parallel private(@privatizer %x_decl#0 -> %arg0 : !fir.ref) { %0:2 = hlfir.declare %arg0 {uniq_name = "x"} : (!fir.ref) -> (!fir.ref, !fir.ref) %m0 = omp.map.info var_ptr(%0#1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref - // CHECK-NEXT: omp.target map_entries(%[[MAP0]] -> {{.*}} : [[X_TYPE]]) - omp.target map_entries(%m0 -> %arg2 : !fir.ref) { + // CHECK-NEXT: omp.target kernel_type(generic) map_entries(%[[MAP0]] -> {{.*}} : [[X_TYPE]]) + omp.target kernel_type(generic) map_entries(%m0 -> %arg2 : !fir.ref) { omp.terminator } omp.terminator @@ -362,8 +362,8 @@ module attributes {omp.is_target_device = true} { omp.parallel private(@privatizer %1#0 -> %arg1 : !fir.ref) { %2:2 = hlfir.declare %arg1 {uniq_name = "x"} : (!fir.ref) -> (!fir.ref, !fir.ref) %m2 = omp.map.info var_ptr(%2#1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref - // CHECK: omp.target map_entries(%[[MAP1]] -> {{.*}} : [[X_TYPE]]) - omp.target map_entries(%m2 -> %arg2 : !fir.ref) { + // CHECK: omp.target kernel_type(generic) map_entries(%[[MAP1]] -> {{.*}} : [[X_TYPE]]) + omp.target kernel_type(generic) map_entries(%m2 -> %arg2 : !fir.ref) { omp.terminator } omp.terminator @@ -396,8 +396,8 @@ module attributes {omp.is_target_device = true} { %1:2 = hlfir.declare %global {uniq_name = "global_scalar"} : (!fir.ref) -> (!fir.ref, !fir.ref) %m1 = omp.map.info var_ptr(%0#1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref %m2 = omp.map.info var_ptr(%1#1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref - // CHECK-NEXT: omp.target map_entries(%[[MAP0]] -> %{{.*}}, %[[MAP1]] -> {{.*}} : !fir.ref, !fir.ref) - omp.target map_entries(%m1 -> %arg0, %m2 -> %arg1 : !fir.ref, !fir.ref) { + // CHECK-NEXT: omp.target kernel_type(generic) map_entries(%[[MAP0]] -> %{{.*}}, %[[MAP1]] -> {{.*}} : !fir.ref, !fir.ref) + omp.target kernel_type(generic) map_entries(%m1 -> %arg0, %m2 -> %arg1 : !fir.ref, !fir.ref) { omp.terminator } omp.terminator @@ -408,17 +408,17 @@ module attributes {omp.is_target_device = true} { %2 = fir.load %global : !fir.ref %3:2 = hlfir.declare %global {uniq_name = "global_scalar"} : (!fir.ref) -> (!fir.ref, !fir.ref) %m3 = omp.map.info var_ptr(%3#1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref - // CHECK: omp.target thread_limit(%[[THREAD_LIMIT]] : i32) map_entries(%[[MAP2]] -> %{{.*}} : !fir.ref) - omp.target thread_limit(%2 : i32) map_entries(%m3 -> %arg0 : !fir.ref) { + // CHECK: omp.target kernel_type(generic) thread_limit(%[[THREAD_LIMIT]] : i32) map_entries(%[[MAP2]] -> %{{.*}} : !fir.ref) + omp.target kernel_type(generic) thread_limit(%2 : i32) map_entries(%m3 -> %arg0 : !fir.ref) { omp.terminator } - // CHECK: omp.target thread_limit(%[[CONST]] : i32) + // CHECK: omp.target kernel_type(generic) thread_limit(%[[CONST]] : i32) %c1 = arith.constant 1 : i32 - omp.target thread_limit(%c1 : i32) { + omp.target kernel_type(generic) thread_limit(%c1 : i32) { omp.terminator } - // CHECK: omp.target thread_limit(%[[CONST]] : i32) - omp.target thread_limit(%c1 : i32) { + // CHECK: omp.target kernel_type(generic) thread_limit(%[[CONST]] : i32) + omp.target kernel_type(generic) thread_limit(%c1 : i32) { omp.terminator } return @@ -432,8 +432,9 @@ module attributes {omp.is_target_device = true} { omp.target_data device(%int : i32) if(%bool) map_entries(%m0 : !fir.ref) { omp.terminator } - // CHECK-NEXT: omp.target allocate({{[^)]*}}) thread_limit({{[^)]*}}) in_reduction({{[^)]*}}) private({{[^)]*}}) { - omp.target allocate(%ref : !fir.ref -> %ref : !fir.ref) + // CHECK-NEXT: omp.target kernel_type(generic) allocate({{[^)]*}}) thread_limit({{[^)]*}}) in_reduction({{[^)]*}}) private({{[^)]*}}) { + omp.target kernel_type(generic) + allocate(%ref : !fir.ref -> %ref : !fir.ref) depend(taskdependin -> %ref : !fir.ref) device(%int : i32) if(%bool) thread_limit(%int : i32) in_reduction(@reduction %ref -> %arg0 : !fir.ref) @@ -460,12 +461,12 @@ module attributes {omp.is_target_device = true} { // CHECK-NEXT: %[[PLACEHOLDER:.*]] = fir.alloca !fir.char<1> // CHECK-NEXT: %[[ONE:.*]] = arith.constant 1 : i32 // CHECK-NEXT: %[[EMBOXCHAR:.*]] = fir.emboxchar %[[PLACEHOLDER]], %[[ONE]] : (!fir.ref>, i32) -> !fir.boxchar<1> - // CHECK-NEXT: omp.target private(@boxchar_firstprivatizer %[[EMBOXCHAR]] -> %{{.*}} [map_idx=0] : !fir.boxchar<1>) + // CHECK-NEXT: omp.target kernel_type(generic) private(@boxchar_firstprivatizer %[[EMBOXCHAR]] -> %{{.*}} [map_idx=0] : !fir.boxchar<1>) %0 = fir.alloca !fir.boxchar<1> %1 = fir.dummy_scope : !fir.dscope %2:2 = fir.unboxchar %arg : (!fir.boxchar<1>) -> (!fir.ref>, index) %3:2 = hlfir.declare %2#0 typeparams %2#1 dummy_scope %1 {uniq_name = "arg"} : (!fir.ref>, index, !fir.dscope) -> (!fir.boxchar<1>, !fir.ref>) - omp.target private(@boxchar_firstprivatizer %3#0 -> %arg3 [map_idx=0] : !fir.boxchar<1>) { + omp.target kernel_type(generic) private(@boxchar_firstprivatizer %3#0 -> %arg3 [map_idx=0] : !fir.boxchar<1>) { omp.terminator } return @@ -484,8 +485,8 @@ module attributes {omp.is_target_device = true} { %4:2 = hlfir.declare %3 storage (%0[0]) {uniq_name = "a"} : (!fir.ref, !fir.ref>) -> (!fir.ref, !fir.ref) // CHECK-NEXT: %[[MAP:.*]] = omp.map.info var_ptr(%[[DECL]]#1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref %map = omp.map.info var_ptr(%4#1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref - // CHECK-NEXT: omp.target map_entries(%[[MAP]] -> %{{.*}} : !fir.ref) - omp.target map_entries(%map -> %arg0 : !fir.ref) { + // CHECK-NEXT: omp.target kernel_type(generic) map_entries(%[[MAP]] -> %{{.*}} : !fir.ref) + omp.target kernel_type(generic) map_entries(%map -> %arg0 : !fir.ref) { omp.terminator } return diff --git a/flang/test/Transforms/OpenMP/function-filtering.mlir b/flang/test/Transforms/OpenMP/function-filtering.mlir index 46291e9321f76..343769b805494 100644 --- a/flang/test/Transforms/OpenMP/function-filtering.mlir +++ b/flang/test/Transforms/OpenMP/function-filtering.mlir @@ -46,7 +46,7 @@ module attributes {omp.is_target_device = true} { omp.declare_target = #omp.declaretarget } { - omp.target {} + omp.target kernel_type(generic) {} func.return } func.func @host_target() -> () @@ -54,11 +54,11 @@ module attributes {omp.is_target_device = true} { omp.declare_target = #omp.declaretarget } { - omp.target {} + omp.target kernel_type(generic) {} func.return } func.func @none_target() -> i32 { - omp.target {} + omp.target kernel_type(generic) {} %0 = arith.constant 25 : i32 func.return %0 : i32 } @@ -67,7 +67,7 @@ module attributes {omp.is_target_device = true} { omp.declare_target = #omp.declaretarget } { - omp.target {} + omp.target kernel_type(generic) {} %0 = call @none_target() : () -> i32 func.return %0 : i32 } @@ -119,7 +119,7 @@ module attributes {omp.is_target_device = false} { omp.declare_target = #omp.declaretarget } { - omp.target {} + omp.target kernel_type(generic) {} func.return } func.func @host_target() -> () @@ -127,11 +127,11 @@ module attributes {omp.is_target_device = false} { omp.declare_target = #omp.declaretarget } { - omp.target {} + omp.target kernel_type(generic) {} func.return } func.func @none_target() -> () { - omp.target {} + omp.target kernel_type(generic) {} func.return } } diff --git a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir index aef72e4fd001e..936a487d27249 100644 --- a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir +++ b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-host.mlir @@ -41,7 +41,7 @@ // CHECK: fir.store %[[VAL_25]] to %[[VAL_14]] : !fir.ref // CHECK: fir.store %[[VAL_26]] to %[[VAL_17]] : !fir.ref // CHECK: fir.store %[[VAL_30]] to %[[VAL_20]] : !fir.ref> -// CHECK: omp.target host_eval(%[[VAL_24]] -> %[[VAL_31:.*]], %[[VAL_25]] -> %[[VAL_32:.*]], %[[VAL_26]] -> %[[VAL_33:.*]] : index, index, index) map_entries(%[[VAL_7]] -> %[[VAL_34:.*]], %[[VAL_8]] -> %[[VAL_35:.*]], %[[VAL_9]] -> %[[VAL_36:.*]], %[[VAL_10]] -> %[[VAL_37:.*]], %[[VAL_13]] -> %[[VAL_38:.*]], %[[VAL_16]] -> %[[VAL_39:.*]], %[[VAL_19]] -> %[[VAL_40:.*]], %[[VAL_22]] -> %[[VAL_41:.*]] : !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref>) { +// CHECK: omp.target kernel_type(spmd) host_eval(%[[VAL_24]] -> %[[VAL_31:.*]], %[[VAL_25]] -> %[[VAL_32:.*]], %[[VAL_26]] -> %[[VAL_33:.*]] : index, index, index) map_entries(%[[VAL_7]] -> %[[VAL_34:.*]], %[[VAL_8]] -> %[[VAL_35:.*]], %[[VAL_9]] -> %[[VAL_36:.*]], %[[VAL_10]] -> %[[VAL_37:.*]], %[[VAL_13]] -> %[[VAL_38:.*]], %[[VAL_16]] -> %[[VAL_39:.*]], %[[VAL_19]] -> %[[VAL_40:.*]], %[[VAL_22]] -> %[[VAL_41:.*]] : !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref>) { // CHECK: %[[VAL_42:.*]] = fir.load %[[VAL_38]] : !fir.ref // CHECK: %[[VAL_43:.*]] = fir.load %[[VAL_39]] : !fir.ref // CHECK: %[[VAL_44:.*]] = fir.load %[[VAL_40]] : !fir.ref @@ -91,7 +91,7 @@ func.func @x(%lb : index, %ub : index, %step : index, %addr : !fir.ref) { %step_map = omp.map.info var_ptr(%step_ref : !fir.ref, index) map_clauses(to) capture(ByRef) -> !fir.ref {name = "step"} %addr_map = omp.map.info var_ptr(%addr : !fir.ref, index) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "addr"} - omp.target map_entries(%lb_map -> %ARG0, %ub_map -> %ARG1, %step_map -> %ARG2, %addr_map -> %ARG3 : !fir.ref, !fir.ref, !fir.ref, !fir.ref) { + omp.target kernel_type(generic) map_entries(%lb_map -> %ARG0, %ub_map -> %ARG1, %step_map -> %ARG2, %addr_map -> %ARG3 : !fir.ref, !fir.ref, !fir.ref, !fir.ref) { %lb_val = fir.load %ARG0 : !fir.ref %ub_val = fir.load %ARG1 : !fir.ref %step_val = fir.load %ARG2 : !fir.ref diff --git a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir index 25f0350ab98b2..832fec201bca3 100644 --- a/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir +++ b/flang/test/Transforms/OpenMP/lower-workdistribute-fission-target.mlir @@ -41,7 +41,7 @@ // CHECK: fir.store %[[VAL_25]] to %[[VAL_14]] : !fir.ref // CHECK: fir.store %[[VAL_26]] to %[[VAL_17]] : !fir.ref // CHECK: fir.store %[[VAL_30]] to %[[VAL_20]] : !fir.ref> -// CHECK: omp.target map_entries(%[[VAL_7]] -> %[[VAL_31:.*]], %[[VAL_8]] -> %[[VAL_32:.*]], %[[VAL_9]] -> %[[VAL_33:.*]], %[[VAL_10]] -> %[[VAL_34:.*]], %[[VAL_13]] -> %[[VAL_35:.*]], %[[VAL_16]] -> %[[VAL_36:.*]], %[[VAL_19]] -> %[[VAL_37:.*]], %[[VAL_22]] -> %[[VAL_38:.*]] : !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref>) { +// CHECK: omp.target kernel_type(spmd) map_entries(%[[VAL_7]] -> %[[VAL_31:.*]], %[[VAL_8]] -> %[[VAL_32:.*]], %[[VAL_9]] -> %[[VAL_33:.*]], %[[VAL_10]] -> %[[VAL_34:.*]], %[[VAL_13]] -> %[[VAL_35:.*]], %[[VAL_16]] -> %[[VAL_36:.*]], %[[VAL_19]] -> %[[VAL_37:.*]], %[[VAL_22]] -> %[[VAL_38:.*]] : !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref, !fir.ref>) { // CHECK: %[[VAL_39:.*]] = fir.load %[[VAL_35]] : !fir.ref // CHECK: %[[VAL_40:.*]] = fir.load %[[VAL_36]] : !fir.ref // CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_37]] : !fir.ref @@ -92,7 +92,7 @@ func.func @x(%lb : index, %ub : index, %step : index, %addr : !fir.ref) { %step_map = omp.map.info var_ptr(%step_ref : !fir.ref, index) map_clauses(to) capture(ByRef) -> !fir.ref {name = "step"} %addr_map = omp.map.info var_ptr(%addr : !fir.ref, index) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "addr"} - omp.target map_entries(%lb_map -> %ARG0, %ub_map -> %ARG1, %step_map -> %ARG2, %addr_map -> %ARG3 : !fir.ref, !fir.ref, !fir.ref, !fir.ref) { + omp.target kernel_type(generic) map_entries(%lb_map -> %ARG0, %ub_map -> %ARG1, %step_map -> %ARG2, %addr_map -> %ARG3 : !fir.ref, !fir.ref, !fir.ref, !fir.ref) { %lb_val = fir.load %ARG0 : !fir.ref %ub_val = fir.load %ARG1 : !fir.ref %step_val = fir.load %ARG2 : !fir.ref diff --git a/flang/test/Transforms/OpenMP/lower-workdistribute-runtime-assign-scalar.mlir b/flang/test/Transforms/OpenMP/lower-workdistribute-runtime-assign-scalar.mlir index 03d5d71df0a82..04e95c27e3c5c 100644 --- a/flang/test/Transforms/OpenMP/lower-workdistribute-runtime-assign-scalar.mlir +++ b/flang/test/Transforms/OpenMP/lower-workdistribute-runtime-assign-scalar.mlir @@ -10,7 +10,7 @@ // CHECK-LABEL: func.func @x( -// CHECK: omp.target {{.*}} { +// CHECK: omp.target kernel_type(spmd) {{.*}} { // CHECK: omp.teams { // CHECK: omp.parallel { // CHECK: omp.distribute { @@ -61,7 +61,7 @@ func.func @x(%arr : !fir.ref>) { %198 = omp.map.info var_ptr(%arr : !fir.ref>, f32) map_clauses(implicit, tofrom) capture(ByRef) bounds(%195, %197) -> !fir.ref> {name = "y"} %199 = omp.map.info var_ptr(%1 : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = ""} %200 = omp.map.info var_ptr(%0 : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = ""} - omp.target map_entries(%198 -> %arg5, %199 -> %arg6, %200 -> %arg7 : !fir.ref>, !fir.ref, !fir.ref) { + omp.target kernel_type(generic) map_entries(%198 -> %arg5, %199 -> %arg6, %200 -> %arg7 : !fir.ref>, !fir.ref, !fir.ref) { %c0_0 = arith.constant 0 : index %201 = fir.load %arg7 : !fir.ref %202 = fir.load %arg6 : !fir.ref diff --git a/flang/test/Transforms/OpenMP/simd-only.mlir b/flang/test/Transforms/OpenMP/simd-only.mlir index a550d5660d224..b8b26487e892d 100644 --- a/flang/test/Transforms/OpenMP/simd-only.mlir +++ b/flang/test/Transforms/OpenMP/simd-only.mlir @@ -84,7 +84,7 @@ func.func @target_map(%arg5: i32, %arg6: !fir.ref) { // CHECK-NOT: omp.map.info %3 = omp.map.info var_ptr(%arg6 : !fir.ref, i32) map_clauses(implicit) capture(ByCopy) -> !fir.ref // CHECK-NOT: omp.target - omp.target map_entries(%3 -> %arg0 : !fir.ref) { + omp.target kernel_type(generic) map_entries(%3 -> %arg0 : !fir.ref) { // CHECK: arith.constant %c1_i32 = arith.constant 1 : i32 // CHECK: fir.store %c1_i32 to %[[ARG_1]] @@ -183,7 +183,7 @@ func.func @map_info(%funcArg0: i32, %funcArg1: !fir.ref) { // CHECK-NOT: omp.map.info %13 = omp.map.info var_ptr(%funcArg1 : !fir.ref, i32) map_clauses(to) capture(ByRef) bounds(%1) -> !fir.ref // CHECK-NOT: omp.target - omp.target map_entries(%13 -> %arg3 : !fir.ref) { + omp.target kernel_type(generic) map_entries(%13 -> %arg3 : !fir.ref) { %c1_i32 = arith.constant 1 : i32 // CHECK: fir.store %c1_i32 to %[[ARG_1]] fir.store %c1_i32 to %arg3 : !fir.ref diff --git a/flang/test/Transforms/debug-omp-target-op-1.fir b/flang/test/Transforms/debug-omp-target-op-1.fir index 6b895b732c42b..08e4a52bf6586 100644 --- a/flang/test/Transforms/debug-omp-target-op-1.fir +++ b/flang/test/Transforms/debug-omp-target-op-1.fir @@ -14,7 +14,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<>} { %3 = fircg.ext_declare %2 {uniq_name = "_QFEy"} : (!fir.ref) -> !fir.ref loc(#loc2) %4 = omp.map.info var_ptr(%1 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "x"} %5 = omp.map.info var_ptr(%3 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "y"} - omp.target map_entries(%4 -> %arg0, %5 -> %arg1 : !fir.ref, !fir.ref) { + omp.target kernel_type(generic) map_entries(%4 -> %arg0, %5 -> %arg1 : !fir.ref, !fir.ref) { %16 = fircg.ext_declare %arg0 {uniq_name = "_QFEx"} : (!fir.ref) -> !fir.ref loc(#loc3) %17 = fircg.ext_declare %arg1 {uniq_name = "_QFEy"} : (!fir.ref) -> !fir.ref loc(#loc4) omp.terminator diff --git a/flang/test/Transforms/debug-omp-target-op-2.fir b/flang/test/Transforms/debug-omp-target-op-2.fir index 15dcf2389b21d..f1d11f2fba84f 100644 --- a/flang/test/Transforms/debug-omp-target-op-2.fir +++ b/flang/test/Transforms/debug-omp-target-op-2.fir @@ -25,7 +25,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<>} { %24 = omp.map.info var_ptr(%15 : !fir.ref>, i32) map_clauses(tofrom) capture(ByRef) bounds(%19, %23) -> !fir.ref> {name = "b"} %25 = omp.map.info var_ptr(%1 : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = ""} %26 = omp.map.info var_ptr(%0 : !fir.ref, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !fir.ref {name = ""} - omp.target map_entries(%24 -> %arg3, %25 -> %arg4, %26 -> %arg5 : !fir.ref>, !fir.ref, !fir.ref) { + omp.target kernel_type(generic) map_entries(%24 -> %arg3, %25 -> %arg4, %26 -> %arg5 : !fir.ref>, !fir.ref, !fir.ref) { %27 = fir.load %arg5 : !fir.ref %28 = fir.load %arg4 : !fir.ref %29 = fir.convert %27 : (i32) -> index diff --git a/flang/test/Transforms/omp-function-filtering-todo.mlir b/flang/test/Transforms/omp-function-filtering-todo.mlir index c5640bb9757f7..cd45de88edfa9 100644 --- a/flang/test/Transforms/omp-function-filtering-todo.mlir +++ b/flang/test/Transforms/omp-function-filtering-todo.mlir @@ -16,7 +16,7 @@ module attributes {omp.is_gpu = true, omp.is_target_device = true} { func.func @foo(%ia : !fir.ref>>>) { %ia.map = omp.map.info var_ptr(%ia : !fir.ref>>>, !fir.box>>) map_clauses(always, implicit, to) capture(ByRef) -> !fir.ref>>> {name = "ia"} - omp.target map_entries(%ia.map -> %arg0 : !fir.ref>>>) { + omp.target kernel_type(spmd) map_entries(%ia.map -> %arg0 : !fir.ref>>>) { omp.parallel { %c1_i32 = arith.constant 1 : i32 omp.wsloop reduction(byref @add_reduction_byref_box_heap_Uxi32 %arg0 -> %arg1 : !fir.ref>>>) { diff --git a/flang/test/Transforms/omp-map-info-finalization-implicit-field.fir b/flang/test/Transforms/omp-map-info-finalization-implicit-field.fir index d3e8125d2ee3d..72230ccd96126 100644 --- a/flang/test/Transforms/omp-map-info-finalization-implicit-field.fir +++ b/flang/test/Transforms/omp-map-info-finalization-implicit-field.fir @@ -26,7 +26,7 @@ func.func @_QQmain() { %7:2 = hlfir.declare %6 {uniq_name = "_QFEdst_record"} : (!fir.ref) -> (!fir.ref, !fir.ref) %16 = omp.map.info var_ptr(%7#1 : !fir.ref, !record_t) map_clauses(implicit, tofrom) capture(ByRef) -> !fir.ref {name = "dst_record"} %17 = omp.map.info var_ptr(%7#1 : !fir.ref, !record_t) map_clauses(implicit, tofrom) capture(ByRef) mapper(@record_mapper) -> !fir.ref {name = "dst_record_with_mapper"} - omp.target map_entries(%16 -> %arg0, %17 -> %arg1 : !fir.ref, !fir.ref) { + omp.target kernel_type(generic) map_entries(%16 -> %arg0, %17 -> %arg1 : !fir.ref, !fir.ref) { %20:2 = hlfir.declare %arg0 {uniq_name = "_QFEdst_record"} : (!fir.ref) -> (!fir.ref, !fir.ref) %21:2 = hlfir.declare %arg1 {uniq_name = "_QFEdst_record"} : (!fir.ref) -> (!fir.ref, !fir.ref) @@ -80,7 +80,7 @@ func.func @_QQmain() { // CHECK-SAME: members(%{{.*}}, %{{.*}} : [1], [1, 0] : {{.*}}) -> {{.*}}> {name = // CHECK-SAME: "dst_record_with_mapper"} -// CHECK: omp.target map_entries( +// CHECK: omp.target kernel_type(generic) map_entries( // CHECK-SAME: %[[RECORD_MAP]] -> %{{[^[:space:]]+}}, // CHECK-SAME: %[[FIELD_MAP]] -> %{{[^[:space:]]+}}, // CHECK-SAME: %[[FIELD_BASE_ADDR_MAP]] -> %{{[^[:space:]]+}} diff --git a/flang/test/Transforms/omp-map-info-finalization.fir b/flang/test/Transforms/omp-map-info-finalization.fir index a808b81e71356..2feda21b0e351 100644 --- a/flang/test/Transforms/omp-map-info-finalization.fir +++ b/flang/test/Transforms/omp-map-info-finalization.fir @@ -18,7 +18,7 @@ func.func @test_descriptor_expansion_pass(%arg0: !fir.box>) { %7 = fir.box_addr %2#1 : (!fir.box>) -> !fir.ref> %8 = omp.map.info var_ptr(%4#1 : !fir.ref>>, !fir.box>) map_clauses(tofrom) capture(ByRef) -> !fir.ref>> %9 = omp.map.info var_ptr(%7 : !fir.ref>, !fir.array) map_clauses(from) capture(ByRef) bounds(%bounds) -> !fir.ref> - omp.target map_entries(%8 -> %arg1, %9 -> %arg2 : !fir.ref>>, !fir.ref>) { + omp.target kernel_type(generic) map_entries(%8 -> %arg1, %9 -> %arg2 : !fir.ref>>, !fir.ref>) { omp.terminator } return @@ -37,7 +37,7 @@ func.func @test_descriptor_expansion_pass(%arg0: !fir.box>) { // CHECK: %[[BASE_ADDR_OFF_2:.*]] = fir.box_offset %[[ALLOCA]] base_addr : (!fir.ref>>) -> !fir.llvm_ptr>> // CHECK: %[[DESC_MEMBER_MAP_2:.*]] = omp.map.info var_ptr(%[[ALLOCA]] : !fir.ref>>, i32) map_clauses(from) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR_OFF_2]] : !fir.llvm_ptr>>) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {name = ""} // CHECK: %[[DESC_PARENT_MAP_2:.*]] = omp.map.info var_ptr(%[[ALLOCA]] : !fir.ref>>, !fir.box>) map_clauses(always, to) capture(ByRef) members(%[[DESC_MEMBER_MAP_2]] : [0] : !fir.llvm_ptr>>) -> !fir.ref> -// CHECK: omp.target map_entries(%[[DESC_PARENT_MAP]] -> %[[ARG1:.*]], %[[DESC_PARENT_MAP_2]] -> %[[ARG2:.*]], %[[DESC_MEMBER_MAP]] -> %[[ARG3:.*]], %[[DESC_MEMBER_MAP_2]] -> %[[ARG4:.*]] : {{.*}}) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[DESC_PARENT_MAP]] -> %[[ARG1:.*]], %[[DESC_PARENT_MAP_2]] -> %[[ARG2:.*]], %[[DESC_MEMBER_MAP]] -> %[[ARG3:.*]], %[[DESC_MEMBER_MAP_2]] -> %[[ARG4:.*]] : {{.*}}) { // ----- @@ -47,7 +47,7 @@ func.func @test_derived_type_map_operand_and_block_addition(%arg0: !fir.ref,nested:!fir.box>>,ry:f32}>>) -> !fir.ref %3 = omp.map.info var_ptr(%2 : !fir.ref, f32) map_clauses(from) capture(ByRef) -> !fir.ref {name = "scalar_struct%ry"} %4 = omp.map.info var_ptr(%arg0 : !fir.ref,nested:!fir.box>>,ry:f32}>>, !fir.type<_QFTdtype{ix:i32,rx:f32,zx:complex,nested:!fir.box>>,ry:f32}>) map_clauses(from) capture(ByRef) members(%1, %3 : [1], [4] : !fir.ref, !fir.ref) -> !fir.ref,nested:!fir.box>>,ry:f32}>> {name = "scalar_struct", partial_map = true} - omp.target map_entries(%4 -> %arg1 : !fir.ref,nested:!fir.box>>,ry:f32}>>) { + omp.target kernel_type(generic) map_entries(%4 -> %arg1 : !fir.ref,nested:!fir.box>>,ry:f32}>>) { omp.terminator } return @@ -57,7 +57,7 @@ func.func @test_derived_type_map_operand_and_block_addition(%arg0: !fir.ref, f32) map_clauses(from) capture(ByRef) -> !fir.ref {name = "scalar_struct%rx"} // CHECK: %[[MAP_MEMBER_2:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref, f32) map_clauses(from) capture(ByRef) -> !fir.ref {name = "scalar_struct%ry"} // CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref,nested:!fir.box>>,ry:f32}>>, !fir.type<_QFTdtype{ix:i32,rx:f32,zx:complex,nested:!fir.box>>,ry:f32}>) map_clauses(from) capture(ByRef) members(%[[MAP_MEMBER_1]], %[[MAP_MEMBER_2]] : [1], [4] : !fir.ref, !fir.ref) -> !fir.ref,nested:!fir.box>>,ry:f32}>> {name = "scalar_struct", partial_map = true} -// CHECK: omp.target map_entries(%[[MAP_PARENT]] -> %[[ARG1:.*]], %[[MAP_MEMBER_1]] -> %[[ARG2:.*]], %[[MAP_MEMBER_2]] -> %[[ARG3:.*]] : !fir.ref,nested:!fir.box>>,ry:f32}>>, !fir.ref, !fir.ref) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_PARENT]] -> %[[ARG1:.*]], %[[MAP_MEMBER_1]] -> %[[ARG2:.*]], %[[MAP_MEMBER_2]] -> %[[ARG3:.*]] : !fir.ref,nested:!fir.box>>,ry:f32}>>, !fir.ref, !fir.ref) { // ----- @@ -70,7 +70,7 @@ func.func @test_nested_derived_type_map_operand_and_block_addition(%arg0: !fir.r %9 = fir.coordinate_of %7, r : (!fir.ref>) -> !fir.ref %10 = omp.map.info var_ptr(%9 : !fir.ref, f32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "sa%n%r"} %11 = omp.map.info var_ptr(%0 : !fir.ref}>>, !fir.type<_QFmaptype_derived_nested_explicit_multiple_membersTscalar_and_array{r:f32,n:!fir.type<_QFmaptype_derived_nested_explicit_multiple_membersTnested{i:i32,r:f32}>}>) map_clauses(tofrom) capture(ByRef) members(%5, %10 : [1,0], [1,1] : !fir.ref, !fir.ref) -> !fir.ref}>> {name = "sa", partial_map = true} - omp.target map_entries(%11 -> %arg1 : !fir.ref}>>) { + omp.target kernel_type(generic) map_entries(%11 -> %arg1 : !fir.ref}>>) { omp.terminator } return @@ -80,7 +80,7 @@ func.func @test_nested_derived_type_map_operand_and_block_addition(%arg0: !fir.r // CHECK: %[[MAP_MEMBER_1:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "sa%n%i"} // CHECK: %[[MAP_MEMBER_2:.*]] = omp.map.info var_ptr(%{{.*}} : !fir.ref, f32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "sa%n%r"} // CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%{{.*}} : {{.*}}, {{.*}}) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBER_1]], %[[MAP_MEMBER_2]] : [1, 0], [1, 1] : !fir.ref, !fir.ref) -> {{.*}} {name = "sa", partial_map = true} -// CHECK: omp.target map_entries(%[[MAP_PARENT]] -> %[[ARG1:.*]], %[[MAP_MEMBER_1]] -> %[[ARG2:.*]], %[[MAP_MEMBER_2]] -> %[[ARG3:.*]] : {{.*}}, !fir.ref, !fir.ref) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_PARENT]] -> %[[ARG1:.*]], %[[MAP_MEMBER_1]] -> %[[ARG2:.*]], %[[MAP_MEMBER_2]] -> %[[ARG3:.*]] : {{.*}}, !fir.ref, !fir.ref) { // ----- @@ -100,7 +100,7 @@ func.func @dtype_alloca_op_block_add(%arg0: !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>) -> !fir.ref>>> %8 = omp.map.info var_ptr(%7 : !fir.ref>>>, !fir.box>>) map_clauses(tofrom) capture(ByRef) bounds(%6) -> !fir.ref>>> {name = "one_l%array_j"} %9 = omp.map.info var_ptr(%0#0 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>, !fir.type<_QFtest_derived_type_allocatable_map_operand_and_block_additionTone_layer{i:f32,scalar:!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>) map_clauses(tofrom) capture(ByRef) members(%8 : [4] : !fir.ref>>>) -> !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>> {name = "one_l", partial_map = true} - omp.target map_entries(%9 -> %arg1 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>) { + omp.target kernel_type(generic) map_entries(%9 -> %arg1 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>) { omp.terminator } return @@ -114,7 +114,7 @@ func.func @dtype_alloca_op_block_add(%arg0: !fir.ref>>>, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[MEMBER_BASE_ADDR]] : !fir.llvm_ptr>>) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {{.*}} // CHECK: %[[MAP_MEMBER_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[MEMBER_COORD]] : !fir.ref>>>, !fir.box>>) map_clauses(always, to) capture(ByRef) -> !fir.ref>>> {name = "one_l%array_j"} // CHECK: %[[MAP_MEMBER_PARENT:.*]] = omp.map.info var_ptr(%[[ALLOCA]]#0 : !fir.ref<[[REC_TY]]>>, [[REC_TY]]>) map_clauses(tofrom) capture(ByRef) members(%10, %9 : [4], [4, 0] : !fir.ref>>>, !fir.llvm_ptr>>) -> !fir.ref<[[REC_TY]]>> {{.*}} -// CHECK: omp.target map_entries(%[[MAP_MEMBER_PARENT]] -> %[[ARG1:.*]], %[[MAP_MEMBER_DESCRIPTOR]] -> %[[ARG2:.*]], %[[MAP_MEMBER_BASE_ADDR]] -> %[[ARG3:.*]] : !fir.ref<[[REC_TY]]>>, !fir.ref>>>, !fir.llvm_ptr>>) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_MEMBER_PARENT]] -> %[[ARG1:.*]], %[[MAP_MEMBER_DESCRIPTOR]] -> %[[ARG2:.*]], %[[MAP_MEMBER_BASE_ADDR]] -> %[[ARG3:.*]] : !fir.ref<[[REC_TY]]>>, !fir.ref>>>, !fir.llvm_ptr>>) { // ----- @@ -139,7 +139,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>) -> !fir.ref %14 = omp.map.info var_ptr(%13 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "one_l%k"} %15 = omp.map.info var_ptr(%0#1 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>>, !fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>) map_clauses(tofrom) capture(ByRef) members(%11, %14 : [4], [5] : !fir.ref>>>, !fir.ref) -> !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>> {name = "one_l", partial_map = true} - omp.target map_entries(%15 -> %arg1 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>>) { + omp.target kernel_type(generic) map_entries(%15 -> %arg1 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32}>>>>) { omp.terminator } return @@ -159,7 +159,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref>>>) -> !fir.llvm_ptr>> // CHECK: %[[MAP_ALLOCA_PARENT_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[ALLOCA]]#1 : !fir.ref>>>, !fir.type<[[REC_TY]]>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[ALLOCATABLE_PARENT_BASE_ADDR]] : !fir.llvm_ptr>>) -> !fir.llvm_ptr>> {{.*}} // CHECK: %[[MAP_PARENT_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[ALLOCA]]#1 : !fir.ref>>>, !fir.box>>) map_clauses(always, to) capture(ByRef) members(%18, %13, %12, %16 : [0], [0, 4], [0, 4, 0], [0, 5] : !fir.llvm_ptr>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) -> !fir.ref>>> {{.*}} -// CHECK: omp.target map_entries(%[[MAP_PARENT_DESCRIPTOR]] -> %[[ARG1:.*]], %[[MAP_ALLOCA_PARENT_BASE_ADDR]] -> %[[ARG2:.*]], %[[MAP_ALLOCA_MEMBER_DESCRIPTOR]] -> %[[ARG3:.*]], %[[MAP_ALLOCA_MEMBER_BASE_ADDR]] -> %[[ARG4:.*]], %[[MAP_REGULAR_MEMBER]] -> %[[ARG5:.*]] : !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_PARENT_DESCRIPTOR]] -> %[[ARG1:.*]], %[[MAP_ALLOCA_PARENT_BASE_ADDR]] -> %[[ARG2:.*]], %[[MAP_ALLOCA_MEMBER_DESCRIPTOR]] -> %[[ARG3:.*]], %[[MAP_ALLOCA_MEMBER_BASE_ADDR]] -> %[[ARG4:.*]], %[[MAP_REGULAR_MEMBER]] -> %[[ARG5:.*]] : !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) { // ----- @@ -188,7 +188,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref,array_k:!fir.box>>,k:i32}>>) -> !fir.ref %17 = omp.map.info var_ptr(%16 : !fir.ref, i32) map_clauses(tofrom) capture(ByRef) -> !fir.ref {name = "one_l%nest%k"} %18 = omp.map.info var_ptr(%0#1 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>>, !fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>) map_clauses(tofrom) capture(ByRef) members(%13, %17 : [6,2], [6,3] : !fir.ref>>>, !fir.ref) -> !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>> {name = "one_l", partial_map = true} - omp.target map_entries(%18 -> %arg1 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>>) { + omp.target kernel_type(generic) map_entries(%18 -> %arg1 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFtest_alloca_nested_derived_type_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>>>) { omp.terminator } return @@ -210,7 +210,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref>>>) -> !fir.llvm_ptr>> // CHECK: %[[MAP_ALLOCATABLE_PARENT_BASE_ADDR:.*]] = omp.map.info var_ptr(%[[ALLOCA]]#1 : !fir.ref>>>, !fir.type<[[REC_TY]]>) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[ALLOCATABLE_PARENT_BASE_ADDR]] : !fir.llvm_ptr>>) -> !fir.llvm_ptr>> {{.*}} // CHECK: %[[MAP_ALLOCATABLE_PARENT_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[ALLOCA]]#1 : !fir.ref>>>, !fir.box>>) map_clauses(always, to) capture(ByRef) members(%21, %15, %14, %19 : [0], [0, 6, 2], [0, 6, 2, 0], [0, 6, 3] : !fir.llvm_ptr>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) -> !fir.ref>>> {{.*}} -// CHECK: omp.target map_entries(%[[MAP_ALLOCATABLE_PARENT_DESCRIPTOR]] -> %[[ARG1:.*]], %[[MAP_ALLOCATABLE_PARENT_BASE_ADDR]] -> %[[ARG2:.*]], %[[MAP_NESTED_ALLOCA_MEMBER]] -> %[[ARG3:.*]], %[[MAP_NESTED_ALLOCA_MEMBER_BASE_ADDR]] -> %[[ARG4:.*]], %[[MAP_NESTED_REGULAR_MEMBER]] -> %[[ARG5:.*]] : !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_ALLOCATABLE_PARENT_DESCRIPTOR]] -> %[[ARG1:.*]], %[[MAP_ALLOCATABLE_PARENT_BASE_ADDR]] -> %[[ARG2:.*]], %[[MAP_NESTED_ALLOCA_MEMBER]] -> %[[ARG3:.*]], %[[MAP_NESTED_ALLOCA_MEMBER_BASE_ADDR]] -> %[[ARG4:.*]], %[[MAP_NESTED_REGULAR_MEMBER]] -> %[[ARG5:.*]] : !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref>>>, !fir.llvm_ptr>>, !fir.ref) { // ----- @@ -232,7 +232,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref,array_k:!fir.box>>,k:i32}>>) -> !fir.ref>>> %10 = omp.map.info var_ptr(%9 : !fir.ref>>>, !fir.box>>) map_clauses(tofrom) capture(ByRef) bounds(%7) -> !fir.ref>>> {name = "one_l%nest%array_k"} %11 = omp.map.info var_ptr(%0#0 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>, !fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTtop_layer{i:f32,scalar:!fir.box>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>) map_clauses(tofrom) capture(ByRef) members(%10 : [6,2] : !fir.ref>>>) -> !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>> {name = "one_l", partial_map = true} - omp.target map_entries(%11 -> %arg1 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>) { + omp.target kernel_type(generic) map_entries(%11 -> %arg1 : !fir.ref>,array_i:!fir.array<10xi32>,j:f32,array_j:!fir.box>>,k:i32,nest:!fir.type<_QFtest_nested_derived_type_alloca_map_operand_and_block_additionTmiddle_layer{i:f32,array_i:!fir.array<10xi32>,array_k:!fir.box>>,k:i32}>}>>) { omp.terminator } return @@ -247,7 +247,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref>>>, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[ALLOCATABLE_MEMBER_BASE_ADDR]] : !fir.llvm_ptr>>) bounds(%[[BOUNDS]]) -> !fir.llvm_ptr>> {{.*}} // CHECK: %[[MAP_ALLOCATABLE_MEMBER_DESCRIPTOR:.*]] = omp.map.info var_ptr(%[[ALLOCATABLE_MEMBER]] : !fir.ref>>>, !fir.box>>) map_clauses(always, to) capture(ByRef) -> !fir.ref>>> {{.*}} // CHECK: %[[MAP_PARENT:.*]] = omp.map.info var_ptr(%[[ALLOCA]]#0 : !fir.ref>, !fir.type<[[REC_TY]]>) map_clauses(tofrom) capture(ByRef) members(%12, %11 : [6, 2], [6, 2, 0] : !fir.ref>>>, !fir.llvm_ptr>>) -> !fir.ref> {{.*}} -// CHECK: omp.target map_entries(%[[MAP_PARENT]] -> %[[ARG1:.*]], %[[MAP_ALLOCATABLE_MEMBER_DESCRIPTOR]] -> %[[ARG2:.*]], %[[MAP_ALLOCATABLE_MEMBER_BASE_ADDR]] -> %[[ARG3:.*]] : !fir.ref>, !fir.ref>>>, !fir.llvm_ptr>>) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_PARENT]] -> %[[ARG1:.*]], %[[MAP_ALLOCATABLE_MEMBER_DESCRIPTOR]] -> %[[ARG2:.*]], %[[MAP_ALLOCATABLE_MEMBER_BASE_ADDR]] -> %[[ARG3:.*]] : !fir.ref>, !fir.ref>>>, !fir.llvm_ptr>>) { // ----- @@ -267,7 +267,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref>>,vertexy:!fir.box>>}>>) -> !fir.ref>>> %10 = omp.map.info var_ptr(%9 : !fir.ref>>>, !fir.box>>) map_clauses(tofrom) capture(ByRef) bounds(%1) -> !fir.ref>>> {name = "alloca_dtype%vertexes(2_8)%vertexy"} %11 = omp.map.info var_ptr(%0#1 : !fir.ref>>,vertexy:!fir.box>>}>>>>,array_i:!fir.array<10xi32>}>>, !fir.type<_QFmaptype_nested_derived_type_member_idxTdtype{i:f32,vertexes:!fir.box>>,vertexy:!fir.box>>}>>>>,array_i:!fir.array<10xi32>}>) map_clauses(storage) capture(ByRef) members(%4, %10 : [1], [1,2] : !fir.ref>>,vertexy:!fir.box>>}>>>>>, !fir.ref>>>) -> !fir.ref>>,vertexy:!fir.box>>}>>>>,array_i:!fir.array<10xi32>}>> {name = "alloca_dtype", partial_map = true} - omp.target map_entries(%11 -> %arg1 : !fir.ref>>,vertexy:!fir.box>>}>>>>,array_i:!fir.array<10xi32>}>>) { + omp.target kernel_type(generic) map_entries(%11 -> %arg1 : !fir.ref>>,vertexy:!fir.box>>}>>>>,array_i:!fir.array<10xi32>}>>) { omp.terminator } return @@ -286,7 +286,7 @@ func.func @alloca_dtype_map_op_block_add(%arg0 : !fir.ref>>>, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR_2]] : !fir.llvm_ptr>>) bounds(%{{.*}}) -> !fir.llvm_ptr>> {{.*}} // CHECK: %[[DESC_MAP_2:.*]] = omp.map.info var_ptr(%[[DESC_2]] : !fir.ref>>>, !fir.box>>) map_clauses(always, to) capture(ByRef) -> !fir.ref>>> {{.*}} // CHECK: %[[TOP_PARENT_MAP:.*]] = omp.map.info var_ptr(%0#1 : !fir.ref>, !fir.type<[[REC_TY]]>) map_clauses(storage) capture(ByRef) members(%6, %5, %14, %13 : [1], [1, 0], [1, 0, 2], [1, 0, 2, 0] : !fir.ref>>>>, !fir.llvm_ptr>>>, !fir.ref>>>, !fir.llvm_ptr>>) -> !fir.ref> {{{.*}} partial_map = true} -// CHECK: omp.target map_entries(%[[TOP_PARENT_MAP]] -> %{{.*}}, %[[DESC_MAP_1]] -> %{{.*}}, %[[BASE_ADDR_MAP_1]] -> %{{.*}}, %[[DESC_MAP_2]] -> %{{.*}}, %[[BASE_ADDR_MAP_2]] -> %{{.*}} : !fir.ref>, !fir.ref>>>>, !fir.llvm_ptr>>>, !fir.ref>>>, !fir.llvm_ptr>>) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[TOP_PARENT_MAP]] -> %{{.*}}, %[[DESC_MAP_1]] -> %{{.*}}, %[[BASE_ADDR_MAP_1]] -> %{{.*}}, %[[DESC_MAP_2]] -> %{{.*}}, %[[BASE_ADDR_MAP_2]] -> %{{.*}} : !fir.ref>, !fir.ref>>>>, !fir.llvm_ptr>>>, !fir.ref>>>, !fir.llvm_ptr>>) { // ----- @@ -353,7 +353,7 @@ func.func @_QPrealtest(%arg0: !fir.boxchar<1>) { %7 = arith.subi %6#1, %c1 : index %8 = omp.map.bounds lower_bound(%c0 : index) upper_bound(%7 : index) extent(%6#1 : index) stride(%c1 : index) start_idx(%c0 : index) {stride_in_bytes = true} %9 = omp.map.info var_ptr(%0 : !fir.ref>, !fir.boxchar<1>) map_clauses(to) capture(ByRef) bounds(%8) -> !fir.ref> - omp.target map_entries(%9 -> %arg1 : !fir.ref>) private(@boxchar.privatizer %3#0 -> %arg2 [map_idx=0] : !fir.boxchar<1>) { + omp.target kernel_type(generic) map_entries(%9 -> %arg1 : !fir.ref>) private(@boxchar.privatizer %3#0 -> %arg2 [map_idx=0] : !fir.boxchar<1>) { %10:2 = fir.unboxchar %arg2 : (!fir.boxchar<1>) -> (!fir.ref>, index) %11:2 = hlfir.declare %10#0 typeparams %10#1 {uniq_name = "tgt_a0"} : (!fir.ref>, index) -> (!fir.boxchar<1>, !fir.ref>) omp.terminator @@ -384,7 +384,7 @@ func.func @_QPrealtest(%arg0: !fir.boxchar<1>) { // CHECK: %[[VAL_12:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref>) -> !fir.llvm_ptr>> // CHECK: %[[VAL_13:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.char<1,?>) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[VAL_12]] : !fir.llvm_ptr>>) bounds(%[[VAL_10]]) -> !fir.llvm_ptr>> // CHECK: %[[VAL_14:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref>, !fir.boxchar<1>) map_clauses(always, to) capture(ByRef) members(%[[VAL_13]] : [0] : !fir.llvm_ptr>>) -> !fir.ref> -// CHECK: omp.target map_entries(%[[VAL_14]] -> %[[VAL_15:.*]], %[[VAL_13]] -> %[[VAL_16:.*]] : !fir.ref>, !fir.llvm_ptr>>) private(@boxchar.privatizer %[[VAL_3]]#0 -> %[[VAL_17:.*]] [map_idx=0] : !fir.boxchar<1>) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[VAL_14]] -> %[[VAL_15:.*]], %[[VAL_13]] -> %[[VAL_16:.*]] : !fir.ref>, !fir.llvm_ptr>>) private(@boxchar.privatizer %[[VAL_3]]#0 -> %[[VAL_17:.*]] [map_idx=0] : !fir.boxchar<1>) { // CHECK: %[[VAL_18:.*]]:2 = fir.unboxchar %[[VAL_17]] : (!fir.boxchar<1>) -> (!fir.ref>, index) // CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_18]]#0 typeparams %[[VAL_18]]#1 {uniq_name = "tgt_a0"} : (!fir.ref>, index) -> (!fir.boxchar<1>, !fir.ref>) // CHECK: omp.terminator diff --git a/flang/test/Transforms/omp-maps-for-privatized-symbols.fir b/flang/test/Transforms/omp-maps-for-privatized-symbols.fir index 6054c70a2700d..6669f0981cb90 100644 --- a/flang/test/Transforms/omp-maps-for-privatized-symbols.fir +++ b/flang/test/Transforms/omp-maps-for-privatized-symbols.fir @@ -25,7 +25,7 @@ module attributes {omp.is_target_device = false} { %c2_i32 = arith.constant 2 : i32 hlfir.assign %c2_i32 to %1#0 : i32, !fir.ref %8 = omp.map.info var_ptr(%1#1 : !fir.ref, i32) map_clauses(to) capture(ByRef) -> !fir.ref {name = "a"} - omp.target map_entries(%8 -> %arg0 : !fir.ref) private(@_QFtarget_simpleEsimple_var_private_ref_box_heap_i32 %5#0 -> %arg1, @_QFtarget_simpleEfp_int_firstprivate_i32 %7#0 -> %arg2 : !fir.ref>>, !fir.ref) { + omp.target kernel_type(generic) map_entries(%8 -> %arg0 : !fir.ref) private(@_QFtarget_simpleEsimple_var_private_ref_box_heap_i32 %5#0 -> %arg1, @_QFtarget_simpleEfp_int_firstprivate_i32 %7#0 -> %arg2 : !fir.ref>>, !fir.ref) { omp.terminator } return @@ -34,4 +34,4 @@ module attributes {omp.is_target_device = false} { // CHECK: %[[MAP0:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref, i32) map_clauses(to) capture(ByRef) -> !fir.ref {name = "a"} // CHECK: %[[MAP1:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref>>, !fir.box>) map_clauses(tofrom) capture(ByRef) -> !fir.ref>> // CHECK: %[[MAP2:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref, i32) map_clauses(to) capture(ByCopy) -> !fir.ref -// CHECK: omp.target map_entries(%[[MAP0]] -> %arg0, %[[MAP1]] -> %arg1, %[[MAP2]] -> %arg2 : !fir.ref, !fir.ref>>, !fir.ref) +// CHECK: omp.target kernel_type(generic) map_entries(%[[MAP0]] -> %arg0, %[[MAP1]] -> %arg1, %[[MAP2]] -> %arg2 : !fir.ref, !fir.ref>>, !fir.ref) diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h index 6a92b136ef51c..35b58375698f6 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauseOperands.h @@ -51,6 +51,12 @@ using DeclareTargetOperands = detail::Clauses; /// representative name. using TargetEnterExitUpdateDataOperands = TargetEnterDataOperands; +/// Extended TargetOperands with kernel_type attribute. +struct TargetExtOperands : public TargetOperands { + /// Kernel execution mode for the target region. + TargetExecModeAttr kernelType; +}; + } // namespace omp } // namespace mlir diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td index 9540cbcbe83d0..a82e0aaa18a1c 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPClauses.td @@ -160,32 +160,6 @@ class OpenMP_AllocatorClauseSkip< def OpenMP_AllocatorClause : OpenMP_AllocatorClauseSkip<>; -//===----------------------------------------------------------------------===// -// LLVM OpenMP extension `ompx_bare` clause -//===----------------------------------------------------------------------===// - -class OpenMP_BareClauseSkip< - bit traits = false, bit arguments = false, bit assemblyFormat = false, - bit description = false, bit extraClassDeclaration = false - > : OpenMP_Clause { - let arguments = (ins - UnitAttr:$bare - ); - - let optAssemblyFormat = [{ - `ompx_bare` $bare - }]; - - let description = [{ - `ompx_bare` allows `omp target teams` to be executed on a GPU with an - explicit number of teams and threads. This clause also allows the teams and - threads sizes to have up to 3 dimensions. - }]; -} - -def OpenMP_BareClause : OpenMP_BareClauseSkip<>; - //===----------------------------------------------------------------------===// // V5.2: [16.1, 16.2] `cancel-directive-name` clause set //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td index bae6620cc916a..652ecd321d934 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPEnums.td @@ -287,7 +287,7 @@ def ScheduleModifierAttr : OpenMP_EnumAttr; def TargetExecModeBare : I32EnumAttrCase<"bare", 0>; def TargetExecModeGeneric : I32EnumAttrCase<"generic", 1>; def TargetExecModeSpmd : I32EnumAttrCase<"spmd", 2>; -def TargetExecModeSpmdNoLoop : I32EnumAttrCase<"no_loop", 3>; +def TargetExecModeSpmdNoLoop : I32EnumAttrCase<"spmd_no_loop", 3>; def TargetExecMode : OpenMP_I32EnumAttr< "TargetExecMode", @@ -298,6 +298,10 @@ def TargetExecMode : OpenMP_I32EnumAttr< TargetExecModeSpmdNoLoop, ]>; +def TargetExecModeAttr : OpenMP_EnumAttr { + let assemblyFormat = "`(` $value `)`"; +} + //===----------------------------------------------------------------------===// // variable_capture_kind enum. //===----------------------------------------------------------------------===// diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 8661700ec1f01..4d45f285af55f 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -1480,9 +1480,9 @@ def TargetOp : OpenMP_Op<"target", traits = [ OutlineableOpenMPOpInterface ], clauses = [ // TODO: Complete clause list (defaultmap, uses_allocators). - OpenMP_AllocateClause, OpenMP_BareClause, OpenMP_DependClause, - OpenMP_DeviceClause, OpenMP_HasDeviceAddrClause, OpenMP_HostEvalClause, - OpenMP_IfClause, OpenMP_InReductionClause, OpenMP_IsDevicePtrClause, + OpenMP_AllocateClause, OpenMP_DependClause, OpenMP_DeviceClause, + OpenMP_HasDeviceAddrClause, OpenMP_HostEvalClause, OpenMP_IfClause, + OpenMP_InReductionClause, OpenMP_IsDevicePtrClause, OpenMP_MapClauseSkip, OpenMP_NowaitClause, OpenMP_PrivateClause, OpenMP_ThreadLimitClause ], singleRegion = true> { @@ -1501,13 +1501,22 @@ def TargetOp : OpenMP_Op<"target", traits = [ (relative to other `map` operands not the whole operands of the operation). For `private` opernads that do not require a map, this value is -1 (which is omitted from the assembly foramt printing). + + The `kernel_type` required attribute reflects how the target region must be + executed, according to the OpenMP construct that it represents. Most + commonly, `spmd` is used to represent `target teams distribute parallel + do/for` or semantically equivalent constructs and `generic` is used for + everything else. `spmd_no_loop` is an optimization of `spmd` mode use for + the same kinds of constructs, and `bare` is used to implement the + `ompx_bare` extension to `target teams` constructs. }] # clausesDescription; let arguments = !con(clausesArgs, - (ins OptionalAttr:$private_maps)); + (ins OptionalAttr:$private_maps, + TargetExecModeAttr:$kernel_type)); let builders = [ - OpBuilder<(ins CArg<"const TargetOperands &">:$clauses)> + OpBuilder<(ins CArg<"const TargetExtOperands &">:$clauses)> ]; let extraClassDeclaration = [{ @@ -1537,27 +1546,20 @@ def TargetOp : OpenMP_Op<"target", traits = [ /// operations, the top level one will be the one captured. Operation *getInnermostCapturedOmpOp(); - /// Infers the kernel type (Bare, Generic or SPMD) based on the contents of - /// the target region. - /// - /// \param capturedOp result of a still valid (no modifications made to any - /// nested operations) previous call to `getInnermostCapturedOmpOp()`. - /// \param hostEvalTripCount output argument to store whether this kernel - /// wraps a loop whose bounds must be evaluated on the host prior to - /// launching it. - static ::mlir::omp::TargetExecMode - getKernelExecFlags(Operation *capturedOp, - bool *hostEvalTripCount = nullptr); + /// Returns whether this kernel requires host evaluation of loop trip count. + bool hasHostEvalTripCount(); }] # clausesExtraClassDeclaration; - let assemblyFormat = clausesAssemblyFormat # [{ - custom( - $region, $has_device_addr_vars, type($has_device_addr_vars), - $host_eval_vars, type($host_eval_vars), $in_reduction_vars, - type($in_reduction_vars), $in_reduction_byref, $in_reduction_syms, - $map_vars, type($map_vars), $private_vars, type($private_vars), - $private_syms, $private_needs_barrier, $private_maps) attr-dict - }]; + let assemblyFormat = "`kernel_type` `` $kernel_type" # + clausesReqAssemblyFormat # " " # + "oilist(" # clausesOptAssemblyFormat # ")" # [{ + custom( + $region, $has_device_addr_vars, type($has_device_addr_vars), + $host_eval_vars, type($host_eval_vars), $in_reduction_vars, + type($in_reduction_vars), $in_reduction_byref, $in_reduction_syms, + $map_vars, type($map_vars), $private_vars, type($private_vars), + $private_syms, $private_needs_barrier, $private_maps) attr-dict + }]; let hasVerifier = 1; let hasRegionVerifier = 1; diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 8cea85bee15b7..551d1bc642269 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -2250,20 +2250,55 @@ LogicalResult TargetUpdateOp::verify() { //===----------------------------------------------------------------------===// void TargetOp::build(OpBuilder &builder, OperationState &state, - const TargetOperands &clauses) { + const TargetExtOperands &clauses) { MLIRContext *ctx = builder.getContext(); // TODO Store clauses in op: allocateVars, allocatorVars, inReductionVars, // inReductionByref, inReductionSyms. TargetOp::build(builder, state, /*allocate_vars=*/{}, /*allocator_vars=*/{}, - clauses.bare, makeArrayAttr(ctx, clauses.dependKinds), - clauses.dependVars, clauses.device, clauses.hasDeviceAddrVars, + makeArrayAttr(ctx, clauses.dependKinds), clauses.dependVars, + clauses.device, clauses.hasDeviceAddrVars, clauses.hostEvalVars, clauses.ifExpr, /*in_reduction_vars=*/{}, /*in_reduction_byref=*/nullptr, /*in_reduction_syms=*/nullptr, clauses.isDevicePtrVars, clauses.mapVars, clauses.nowait, clauses.privateVars, makeArrayAttr(ctx, clauses.privateSyms), clauses.privateNeedsBarrier, clauses.threadLimitVars, - /*private_maps=*/nullptr); + /*private_maps=*/nullptr, clauses.kernelType); +} + +bool TargetOp::hasHostEvalTripCount() { + TargetExecMode mode = getKernelType(); + if (mode == TargetExecMode::spmd || mode == TargetExecMode::spmd_no_loop) + return true; + + if (mode == TargetExecMode::bare) + return false; + + // If it represents a `target teams distribute` construct, also evaluate the + // `distribute` trip count on the host. + Operation *capturedOp = getInnermostCapturedOmpOp(); + if (auto loopNestOp = dyn_cast_if_present(capturedOp)) { + SmallVector loopWrappers; + loopNestOp.gatherWrappers(loopWrappers); + + LoopWrapperInterface *innermostWrapper = loopWrappers.begin(); + if (isa(innermostWrapper)) + innermostWrapper = std::next(innermostWrapper); + + auto numWrappers = std::distance(innermostWrapper, loopWrappers.end()); + if (numWrappers != 1) + return false; + + if (!isa(innermostWrapper)) + return false; + + Operation *parentOp = innermostWrapper->getOperation()->getParentOp(); + if (isa_and_present(parentOp) && + parentOp->getParentOp() == getOperation()) + return true; + } + + return false; } LogicalResult TargetOp::verify() { @@ -2282,15 +2317,40 @@ LogicalResult TargetOp::verify() { LogicalResult TargetOp::verifyRegions() { auto teamsOps = getOps(); - if (std::distance(teamsOps.begin(), teamsOps.end()) > 1) + auto numNestedTeams = std::distance(teamsOps.begin(), teamsOps.end()); + if (numNestedTeams > 1) return emitError("target containing multiple 'omp.teams' nested ops"); + if (numNestedTeams == 0 && getKernelType() == TargetExecMode::bare) + return emitOpError() + << "bare kernel must contain a nested 'omp.teams' operation"; + + if (getKernelType() == TargetExecMode::spmd || + getKernelType() == TargetExecMode::spmd_no_loop) { + bool containsLoop = getRegion() + .walk([](LoopNestOp loopOp) { + return WalkResult::interrupt(); + }) + .wasInterrupted(); + if (!containsLoop) + return emitOpError() + << "SPMD kernel must contain a nested 'omp.loop_nest' operation"; + } + + bool isTargetDevice = false; + if (auto offloadMod = (*this)->getParentOfType()) + if (offloadMod.getIsTargetDevice()) + isTargetDevice = true; + // Check that host_eval values are only used in legal ways. - bool hostEvalTripCount; - Operation *capturedOp = getInnermostCapturedOmpOp(); - TargetExecMode execMode = getKernelExecFlags(capturedOp, &hostEvalTripCount); - for (Value hostEvalArg : - cast(getOperation()).getHostEvalBlockArgs()) { + llvm::ArrayRef hostEvalBlockArgs = + cast(getOperation()).getHostEvalBlockArgs(); + + if (!hostEvalBlockArgs.empty() && isTargetDevice) + emitOpError() << "'host_eval' is only supported during host compilation"; + + bool hostEvalTripCount = hasHostEvalTripCount(); + for (Value hostEvalArg : hostEvalBlockArgs) { for (Operation *user : hostEvalArg.getUsers()) { if (auto teamsOp = dyn_cast(user)) { // Check if used in num_teams_lower or any of num_teams_upper_vars @@ -2303,17 +2363,15 @@ LogicalResult TargetOp::verifyRegions() { "and 'thread_limit' in 'omp.teams'"; } if (auto parallelOp = dyn_cast(user)) { - if (execMode == TargetExecMode::spmd && - parallelOp->isAncestor(capturedOp) && - llvm::is_contained(parallelOp.getNumThreadsVars(), hostEvalArg)) + if (llvm::is_contained(parallelOp.getNumThreadsVars(), hostEvalArg)) continue; return emitOpError() << "host_eval argument only legal as 'num_threads' in " - "'omp.parallel' when representing target SPMD"; + "'omp.parallel'"; } if (auto loopNestOp = dyn_cast(user)) { - if (hostEvalTripCount && loopNestOp.getOperation() == capturedOp && + if (hostEvalTripCount && (llvm::is_contained(loopNestOp.getLoopLowerBounds(), hostEvalArg) || llvm::is_contained(loopNestOp.getLoopUpperBounds(), hostEvalArg) || llvm::is_contained(loopNestOp.getLoopSteps(), hostEvalArg))) @@ -2328,6 +2386,19 @@ LogicalResult TargetOp::verifyRegions() { << user->getName() << "' operation"; } } + + if (hostEvalTripCount && !isTargetDevice) { + if (auto loopOp = dyn_cast(getInnermostCapturedOmpOp())) { + for (auto arg : llvm::concat(loopOp.getLoopLowerBounds(), + loopOp.getLoopUpperBounds(), + loopOp.getLoopSteps())) { + if (!llvm::is_contained(hostEvalBlockArgs, arg)) + return emitOpError() << "nested 'omp.loop_nest' bounds expected to " + "be host-evaluated"; + } + } + } + return success(); } @@ -2391,10 +2462,24 @@ findCapturedOmpOp(Operation *rootOp, bool checkSingleMandatoryExec, } Operation *TargetOp::getInnermostCapturedOmpOp() { + // If this is an SPMD kernel, then just attempt to find the first available + // omp.loop_nest. If the kernel type has been properly set, that must be the + // captured loop. + if (getKernelType() == TargetExecMode::spmd || + getKernelType() == TargetExecMode::spmd_no_loop) { + Operation *spmdLoop = nullptr; + getRegion().walk([&spmdLoop](LoopNestOp loopOp) { + spmdLoop = loopOp.getOperation(); + return WalkResult::interrupt(); + }); + assert(spmdLoop && "SPMD target regions must contain a loop"); + return spmdLoop; + } + auto *ompDialect = getContext()->getLoadedDialect(); - // Only allow OpenMP terminators and non-OpenMP ops that have known memory - // effects, but don't include a memory write effect. + // Only allow OpenMP terminators and non-OpenMP ops that either have known + // memory effects excluding memory write effects, or are pure. return findCapturedOmpOp( *this, /*checkSingleMandatoryExec=*/true, [&](Operation *sibling) { if (!sibling) @@ -2414,123 +2499,10 @@ Operation *TargetOp::getInnermostCapturedOmpOp() { SideEffects::DefaultResource>(effect.getResource()); }); } - return true; + return isPure(sibling); }); } -/// Check if we can promote SPMD kernel to No-Loop kernel. -static bool canPromoteToNoLoop(Operation *capturedOp, TeamsOp teamsOp, - WsloopOp *wsLoopOp) { - // num_teams clause can break no-loop teams/threads assumption. - if (!teamsOp.getNumTeamsUpperVars().empty()) - return false; - - // Reduction kernels are slower in no-loop mode. - if (teamsOp.getNumReductionVars()) - return false; - if (wsLoopOp->getNumReductionVars()) - return false; - - // Check if the user allows the promotion of kernels to no-loop mode. - OffloadModuleInterface offloadMod = - capturedOp->getParentOfType(); - if (!offloadMod) - return false; - auto ompFlags = offloadMod.getFlags(); - if (!ompFlags) - return false; - return ompFlags.getAssumeTeamsOversubscription() && - ompFlags.getAssumeThreadsOversubscription(); -} - -TargetExecMode TargetOp::getKernelExecFlags(Operation *capturedOp, - bool *hostEvalTripCount) { - // TODO: Support detection of bare kernel mode. - // A non-null captured op is only valid if it resides inside of a TargetOp - // and is the result of calling getInnermostCapturedOmpOp() on it. - TargetOp targetOp = - capturedOp ? capturedOp->getParentOfType() : nullptr; - assert((!capturedOp || - (targetOp && targetOp.getInnermostCapturedOmpOp() == capturedOp)) && - "unexpected captured op"); - - if (hostEvalTripCount) - *hostEvalTripCount = false; - - // If it's not capturing a loop, it's a default target region. - if (!isa_and_present(capturedOp)) - return TargetExecMode::generic; - - // Get the innermost non-simd loop wrapper. - SmallVector loopWrappers; - cast(capturedOp).gatherWrappers(loopWrappers); - assert(!loopWrappers.empty()); - - LoopWrapperInterface *innermostWrapper = loopWrappers.begin(); - if (isa(innermostWrapper)) - innermostWrapper = std::next(innermostWrapper); - - auto numWrappers = std::distance(innermostWrapper, loopWrappers.end()); - if (numWrappers != 1 && numWrappers != 2) - return TargetExecMode::generic; - - // Detect target-teams-distribute-parallel-wsloop[-simd]. - if (numWrappers == 2) { - WsloopOp *wsloopOp = dyn_cast(innermostWrapper); - if (!wsloopOp) - return TargetExecMode::generic; - - innermostWrapper = std::next(innermostWrapper); - if (!isa(innermostWrapper)) - return TargetExecMode::generic; - - Operation *parallelOp = (*innermostWrapper)->getParentOp(); - if (!isa_and_present(parallelOp)) - return TargetExecMode::generic; - - TeamsOp teamsOp = dyn_cast(parallelOp->getParentOp()); - if (!teamsOp) - return TargetExecMode::generic; - - if (teamsOp->getParentOp() == targetOp.getOperation()) { - TargetExecMode result = TargetExecMode::spmd; - if (canPromoteToNoLoop(capturedOp, teamsOp, wsloopOp)) - result = TargetExecMode::no_loop; - if (hostEvalTripCount) - *hostEvalTripCount = true; - return result; - } - } - // Detect target-teams-distribute[-simd] and target-teams-loop. - else if (isa(innermostWrapper)) { - Operation *teamsOp = (*innermostWrapper)->getParentOp(); - if (!isa_and_present(teamsOp)) - return TargetExecMode::generic; - - if (teamsOp->getParentOp() != targetOp.getOperation()) - return TargetExecMode::generic; - - if (hostEvalTripCount) - *hostEvalTripCount = true; - - if (isa(innermostWrapper)) - return TargetExecMode::spmd; - - return TargetExecMode::generic; - } - // Detect target-parallel-wsloop[-simd]. - else if (isa(innermostWrapper)) { - Operation *parallelOp = (*innermostWrapper)->getParentOp(); - if (!isa_and_present(parallelOp)) - return TargetExecMode::generic; - - if (parallelOp->getParentOp() == targetOp.getOperation()) - return TargetExecMode::spmd; - } - - return TargetExecMode::generic; -} - //===----------------------------------------------------------------------===// // ParallelOp //===----------------------------------------------------------------------===// @@ -2695,8 +2667,8 @@ LogicalResult TeamsOp::verify() { // omp.teams construct. The issue is how to support the initialization of // this operation's own arguments (allow SSA values across omp.target?). Operation *op = getOperation(); - if (!isa(op->getParentOp()) && - !opInGlobalImplicitParallelRegion(op)) + auto parentTarget = llvm::dyn_cast_if_present(op->getParentOp()); + if (!parentTarget && !opInGlobalImplicitParallelRegion(op)) return emitError("expected to be nested inside of omp.target or not nested " "in any OpenMP dialect operations"); @@ -2705,6 +2677,11 @@ LogicalResult TeamsOp::verify() { this->getNumTeamsUpperVars()))) return failure(); + if (parentTarget && + parentTarget.getKernelType() == TargetExecMode::spmd_no_loop && + (getNumTeamsLower() || !getNumTeamsUpperVars().empty())) + return emitOpError() << "'num_teams' not allowed in SPMD-no-loop kernels"; + // Check for allocate clause restrictions if (getAllocateVars().size() != getAllocatorVars().size()) return emitError( @@ -4320,8 +4297,7 @@ LogicalResult PrivateClauseOp::verifyRegions() { if (region.getNumArguments() != expectedNumArgs) return mlir::emitError(region.getLoc()) - << "`" << regionName << "`: " - << "expected " << expectedNumArgs + << "`" << regionName << "`: " << "expected " << expectedNumArgs << " region arguments, got: " << region.getNumArguments(); for (Block &block : region) { diff --git a/mlir/lib/Dialect/OpenMP/Utils/Utils.cpp b/mlir/lib/Dialect/OpenMP/Utils/Utils.cpp index f5b7aa7ca2e2c..a9edf273e0ddd 100644 --- a/mlir/lib/Dialect/OpenMP/Utils/Utils.cpp +++ b/mlir/lib/Dialect/OpenMP/Utils/Utils.cpp @@ -90,8 +90,7 @@ bool mlir::omp::opInSharedDeviceContext(Operation &op) { targetOp = dyn_cast(op); if (targetOp) { - if (targetOp.getKernelExecFlags(targetOp.getInnermostCapturedOmpOp()) != - omp::TargetExecMode::generic) + if (targetOp.getKernelType() != omp::TargetExecMode::generic) return false; } else { auto declTargetIface = op.getParentOfType(); diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 151a7ae32fd42..b17bc8062636c 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -334,7 +334,7 @@ static LogicalResult checkImplementationStatus(Operation &op) { result = todo("allocate"); }; auto checkBare = [&todo](auto op, LogicalResult &result) { - if (op.getBare()) + if (op.getKernelType() == omp::TargetExecMode::bare) result = todo("ompx_bare"); }; auto checkDepend = [&todo](auto op, LogicalResult &result) { @@ -3197,16 +3197,14 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, // Check if we can generate no-loop kernel bool noLoopMode = false; omp::TargetOp targetOp = wsloopOp->getParentOfType(); - if (targetOp) { + if (targetOp && + targetOp.getKernelType() == omp::TargetExecMode::spmd_no_loop) { Operation *targetCapturedOp = targetOp.getInnermostCapturedOmpOp(); // We need this check because, without it, noLoopMode would be set to true // for every omp.wsloop nested inside a no-loop SPMD target region, even if // that loop is not the top-level SPMD one. - if (loopOp == targetCapturedOp) { - if (targetOp.getKernelExecFlags(targetCapturedOp) == - omp::TargetExecMode::no_loop) - noLoopMode = true; - } + if (loopOp == targetCapturedOp) + noLoopMode = true; } llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP = @@ -6485,7 +6483,8 @@ initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, } // Update kernel bounds structure for the `OpenMPIRBuilder` to use. - omp::TargetExecMode execMode = targetOp.getKernelExecFlags(capturedOp); + // Use the kernel_type attribute set by the frontend instead of analyzing IR. + omp::TargetExecMode execMode = targetOp.getKernelType(); switch (execMode) { case omp::TargetExecMode::bare: attrs.ExecFlags = llvm::omp::OMP_TGT_EXEC_MODE_BARE; @@ -6496,7 +6495,7 @@ initTargetDefaultAttrs(omp::TargetOp targetOp, Operation *capturedOp, case omp::TargetExecMode::spmd: attrs.ExecFlags = llvm::omp::OMP_TGT_EXEC_MODE_SPMD; break; - case omp::TargetExecMode::no_loop: + case omp::TargetExecMode::spmd_no_loop: attrs.ExecFlags = llvm::omp::OMP_TGT_EXEC_MODE_SPMD_NO_LOOP; break; } @@ -6556,9 +6555,7 @@ initTargetRuntimeAttrs(llvm::IRBuilderBase &builder, if (numThreads) attrs.MaxThreads = moduleTranslation.lookupValue(numThreads); - bool hostEvalTripCount; - targetOp.getKernelExecFlags(capturedOp, &hostEvalTripCount); - if (hostEvalTripCount) { + if (targetOp.hasHostEvalTripCount()) { llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); attrs.LoopTripCount = nullptr; diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir index b122f425f0752..2b8e0714e8f46 100644 --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -267,7 +267,7 @@ llvm.func @_QPomp_target_data_region(%a : !llvm.ptr, %i : !llvm.ptr) { // CHECK: %[[VAL_0:.*]] = llvm.mlir.constant(64 : i32) : i32 // CHECK: %[[MAP1:.*]] = omp.map.info var_ptr(%[[ARG_0]] : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} // CHECK: %[[MAP2:.*]] = omp.map.info var_ptr(%[[ARG_1]] : !llvm.ptr, i32) map_clauses(implicit, storage) capture(ByCopy) -> !llvm.ptr {name = ""} -// CHECK: omp.target thread_limit(%[[VAL_0]] : i32) map_entries(%[[MAP1]] -> %[[BB_ARG0:.*]], %[[MAP2]] -> %[[BB_ARG1:.*]] : !llvm.ptr, !llvm.ptr) { +// CHECK: omp.target kernel_type(generic) thread_limit(%[[VAL_0]] : i32) map_entries(%[[MAP1]] -> %[[BB_ARG0:.*]], %[[MAP2]] -> %[[BB_ARG1:.*]] : !llvm.ptr, !llvm.ptr) { // CHECK: %[[VAL_1:.*]] = llvm.mlir.constant(10 : i32) : i32 // CHECK: llvm.store %[[VAL_1]], %[[BB_ARG1]] : i32, !llvm.ptr // CHECK: omp.terminator @@ -279,7 +279,7 @@ llvm.func @_QPomp_target(%a : !llvm.ptr, %i : !llvm.ptr) { %0 = llvm.mlir.constant(64 : i32) : i32 %1 = omp.map.info var_ptr(%a : !llvm.ptr, !llvm.array<1024 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %3 = omp.map.info var_ptr(%i : !llvm.ptr, i32) map_clauses(implicit, storage) capture(ByCopy) -> !llvm.ptr {name = ""} - omp.target thread_limit(%0 : i32) map_entries(%1 -> %arg0, %3 -> %arg1 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) thread_limit(%0 : i32) map_entries(%1 -> %arg0, %3 -> %arg1 : !llvm.ptr, !llvm.ptr) { %2 = llvm.mlir.constant(10 : i32) : i32 llvm.store %2, %arg1 : i32, !llvm.ptr omp.terminator @@ -481,7 +481,7 @@ llvm.func @sub_() { // CHECK: %[[C_14:.*]] = llvm.mlir.constant(1 : index) : i64 // CHECK: %[[BOUNDS1:.*]] = omp.map.bounds lower_bound(%[[C_12]] : i64) upper_bound(%[[C_11]] : i64) stride(%[[C_14]] : i64) start_idx(%[[C_14]] : i64) // CHECK: %[[MAP1:.*]] = omp.map.info var_ptr(%[[ARG_2]] : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%[[BOUNDS1]]) -> !llvm.ptr {name = ""} -// CHECK: omp.target map_entries(%[[MAP0]] -> %[[BB_ARG0:.*]], %[[MAP1]] -> %[[BB_ARG1:.*]] : !llvm.ptr, !llvm.ptr) { +// CHECK: omp.target kernel_type(generic) map_entries(%[[MAP0]] -> %[[BB_ARG0:.*]], %[[MAP1]] -> %[[BB_ARG1:.*]] : !llvm.ptr, !llvm.ptr) { // CHECK: omp.terminator // CHECK: } // CHECK: llvm.return @@ -500,7 +500,7 @@ llvm.func @_QPtarget_map_with_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: %9 = llvm.mlir.constant(1 : index) : i64 %10 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) stride(%9 : i64) start_idx(%9 : i64) %11 = omp.map.info var_ptr(%arg2 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%10) -> !llvm.ptr {name = ""} - omp.target map_entries(%5 -> %arg3, %11 -> %arg4: !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%5 -> %arg3, %11 -> %arg4: !llvm.ptr, !llvm.ptr) { omp.terminator } llvm.return diff --git a/mlir/test/Dialect/OpenMP/canonicalize.mlir b/mlir/test/Dialect/OpenMP/canonicalize.mlir index de6c931ecc5fd..cc42f2ed8618c 100644 --- a/mlir/test/Dialect/OpenMP/canonicalize.mlir +++ b/mlir/test/Dialect/OpenMP/canonicalize.mlir @@ -130,7 +130,7 @@ func.func private @foo() -> () // ----- func.func @constant_hoisting_target(%x : !llvm.ptr) { - omp.target { + omp.target kernel_type(generic) { ^bb0(%arg0: !llvm.ptr): %c1 = arith.constant 10 : i32 llvm.store %c1, %arg0 : i32, !llvm.ptr diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index 0cab769049bac..1dd00102e14e2 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -1434,7 +1434,7 @@ func.func @omp_teams_parent() { // ----- func.func @omp_teams_allocate(%data_var : memref) { - omp.target { + omp.target kernel_type(generic) { // expected-error @below {{expected equal sizes for allocate and allocator variables}} "omp.teams" (%data_var) ({ omp.terminator @@ -1447,7 +1447,7 @@ func.func @omp_teams_allocate(%data_var : memref) { // ----- func.func @omp_teams_num_teams1(%lb : i32) { - omp.target { + omp.target kernel_type(generic) { // expected-error @below {{expected exactly one num_teams upper bound when lower bound is specified}} "omp.teams" (%lb) ({ omp.terminator @@ -1460,7 +1460,7 @@ func.func @omp_teams_num_teams1(%lb : i32) { // ----- func.func @omp_teams_num_teams_multidim_with_bounds() { - omp.target { + omp.target kernel_type(generic) { %v0 = arith.constant 1 : i32 %v1 = arith.constant 2 : i32 %lb = arith.constant 3 : i32 @@ -1477,7 +1477,7 @@ func.func @omp_teams_num_teams_multidim_with_bounds() { // ----- func.func @omp_teams_num_teams2(%lb : i32, %ub : i16) { - omp.target { + omp.target kernel_type(generic) { // expected-error @below {{expected num_teams upper bound and lower bound to be the same type}} omp.teams num_teams(%lb : i32 to %ub : i16) { omp.terminator @@ -2237,7 +2237,7 @@ func.func @omp_threadprivate() { func.func @omp_target(%map1: memref) { %mapv = omp.map.info var_ptr(%map1 : memref, tensor) map_clauses(delete) capture(ByRef) -> memref {name = ""} // expected-error @below {{to, from, tofrom and alloc map types are permitted}} - omp.target map_entries(%mapv -> %arg0: memref) { + omp.target kernel_type(generic) map_entries(%mapv -> %arg0: memref) { omp.terminator } return @@ -2381,7 +2381,7 @@ func.func @omp_target_update_data_depend(%a: memref) { func.func @omp_target_multiple_teams() { // expected-error @below {{target containing multiple 'omp.teams' nested ops}} - omp.target { + omp.target kernel_type(generic) { omp.teams { omp.terminator } @@ -2395,23 +2395,25 @@ func.func @omp_target_multiple_teams() { // ----- -func.func @omp_target_host_eval(%x : !llvm.ptr) { - // expected-error @below {{op host_eval argument illegal use in 'llvm.load' operation}} - omp.target host_eval(%x -> %arg0 : !llvm.ptr) { - %0 = llvm.load %arg0 : !llvm.ptr -> f32 +module attributes { omp.is_target_device = true } { +func.func @omp_target_host_eval_target_device(%x: i32) { + // expected-error @below {{op 'host_eval' is only supported during host compilation}} + omp.target kernel_type(generic) host_eval(%x -> %arg0 : i32) { + omp.teams num_teams(to %arg0 : i32) { + omp.terminator + } omp.terminator } return } +} // ----- -func.func @omp_target_host_eval_teams(%x : i1) { - // expected-error @below {{op host_eval argument only legal as 'num_teams' and 'thread_limit' in 'omp.teams'}} - omp.target host_eval(%x -> %arg0 : i1) { - omp.teams if(%arg0) { - omp.terminator - } +func.func @omp_target_host_eval(%x : !llvm.ptr) { + // expected-error @below {{op host_eval argument illegal use in 'llvm.load' operation}} + omp.target kernel_type(generic) host_eval(%x -> %arg0 : !llvm.ptr) { + %0 = llvm.load %arg0 : !llvm.ptr -> f32 omp.terminator } return @@ -2419,10 +2421,10 @@ func.func @omp_target_host_eval_teams(%x : i1) { // ----- -func.func @omp_target_host_eval_parallel(%x : i32) { - // expected-error @below {{op host_eval argument only legal as 'num_threads' in 'omp.parallel' when representing target SPMD}} - omp.target host_eval(%x -> %arg0 : i32) { - omp.parallel num_threads(%arg0 : i32) { +func.func @omp_target_host_eval_teams(%x : i1) { + // expected-error @below {{op host_eval argument only legal as 'num_teams' and 'thread_limit' in 'omp.teams'}} + omp.target kernel_type(generic) host_eval(%x -> %arg0 : i1) { + omp.teams if(%arg0) { omp.terminator } omp.terminator @@ -2434,7 +2436,7 @@ func.func @omp_target_host_eval_parallel(%x : i32) { func.func @omp_target_host_eval_loop1(%x : i32) { // expected-error @below {{op host_eval argument only legal as loop bounds and steps in 'omp.loop_nest' when trip count must be evaluated in the host}} - omp.target host_eval(%x -> %arg0 : i32) { + omp.target kernel_type(generic) host_eval(%x -> %arg0 : i32) { omp.wsloop { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield @@ -2449,7 +2451,7 @@ func.func @omp_target_host_eval_loop1(%x : i32) { func.func @omp_target_host_eval_loop2(%x : i32) { // expected-error @below {{op host_eval argument only legal as loop bounds and steps in 'omp.loop_nest' when trip count must be evaluated in the host}} - omp.target host_eval(%x -> %arg0 : i32) { + omp.target kernel_type(generic) host_eval(%x -> %arg0 : i32) { omp.teams { ^bb0: %0 = arith.constant 0 : i1 @@ -2471,11 +2473,96 @@ func.func @omp_target_host_eval_loop2(%x : i32) { // ----- +func.func @omp_target_host_eval_loop3(%x : i32) { + // expected-error @below {{op host_eval argument only legal as loop bounds and steps in 'omp.loop_nest' when trip count must be evaluated in the host}} + omp.target kernel_type(bare) host_eval(%x -> %arg0 : i32) { + omp.teams { + omp.wsloop { + omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { + omp.yield + } + } + omp.terminator + } + omp.terminator + } + return +} + +// ----- + +func.func @omp_target_host_eval_tripcount() { + // expected-error @below {{op nested 'omp.loop_nest' bounds expected to be host-evaluated}} + omp.target kernel_type(spmd) { + %0 = arith.constant 1 : i32 + omp.teams { + omp.loop { + omp.loop_nest (%iv) : i32 = (%0) to (%0) step (%0) { + omp.yield + } + } + omp.terminator + } + omp.terminator + } + return +} + +// ----- + +func.func @omp_target_bare(%x : i32) { + // expected-error @below {{op bare kernel must contain a nested 'omp.teams' operation}} + omp.target kernel_type(bare) { + omp.terminator + } + return +} + +// ----- + +func.func @omp_target_spmd() { + // expected-error @below {{op SPMD kernel must contain a nested 'omp.loop_nest' operation}} + omp.target kernel_type(spmd) { + omp.terminator + } + return +} + +// ----- + +func.func @omp_target_no_loop() { + // expected-error @below {{op SPMD kernel must contain a nested 'omp.loop_nest' operation}} + omp.target kernel_type(spmd_no_loop) { + omp.terminator + } + return +} + +// ----- + +func.func @omp_target_no_loop_num_teams(%x : i32) { + omp.target kernel_type(spmd_no_loop) host_eval(%x -> %arg0 : i32) { + // expected-error @below {{op 'num_teams' not allowed in SPMD-no-loop kernels}} + omp.teams num_teams(to %arg0 : i32) { + omp.loop { + omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { + omp.yield + } + } + omp.terminator + } + omp.terminator + } + return +} + +// ----- + func.func @omp_target_depend(%data_var: memref) { // expected-error @below {{op expected as many depend values as depend variables}} "omp.target"(%data_var) ({ "omp.terminator"() : () -> () - }) {depend_kinds = [], operandSegmentSizes = array} : (memref) -> () + }) {kernel_type = #omp, depend_kinds = [], operandSegmentSizes = array} : (memref) -> () "func.return"() : () -> () } diff --git a/mlir/test/Dialect/OpenMP/omp-offload-privatization-prepare-by-value.mlir b/mlir/test/Dialect/OpenMP/omp-offload-privatization-prepare-by-value.mlir index 8972a083e2c47..793e35ad650d6 100644 --- a/mlir/test/Dialect/OpenMP/omp-offload-privatization-prepare-by-value.mlir +++ b/mlir/test/Dialect/OpenMP/omp-offload-privatization-prepare-by-value.mlir @@ -67,7 +67,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %25 = omp.map.info var_ptr(%3 : !llvm.ptr, i8) map_clauses(implicit, to) capture(ByRef) var_ptr_ptr(%24 : !llvm.ptr) bounds(%23) -> !llvm.ptr %26 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.struct<(ptr, i64)>) map_clauses(to) capture(ByRef) members(%25 : [0] : !llvm.ptr) -> !llvm.ptr %27 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr - omp.target nowait map_entries(%26 -> %arg1, %27 -> %arg2, %25 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@boxchar_firstprivate %18 -> %arg4 [map_idx=0], @private_eye %1 -> %arg5 [map_idx=1] : !llvm.struct<(ptr, i64)>, !llvm.ptr) { + omp.target kernel_type(generic) nowait map_entries(%26 -> %arg1, %27 -> %arg2, %25 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@boxchar_firstprivate %18 -> %arg4 [map_idx=0], @private_eye %1 -> %arg5 [map_idx=1] : !llvm.struct<(ptr, i64)>, !llvm.ptr) { omp.terminator } llvm.return @@ -112,7 +112,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec // CHECK: %[[VAL_32:.*]] = omp.map.info var_ptr(%[[HEAP0]] : !llvm.ptr, i8) map_clauses(implicit, to) capture(ByRef) var_ptr_ptr(%[[VAL_31]] : !llvm.ptr) bounds(%[[VAL_25]]) -> !llvm.ptr // CHECK: %[[VAL_33:.*]] = omp.map.info var_ptr(%[[HEAP0]] : !llvm.ptr, !llvm.struct<(ptr, i64)>) map_clauses(to) capture(ByRef) members(%[[VAL_32]] : [0] : !llvm.ptr) -> !llvm.ptr // CHECK: %[[VAL_34:.*]] = llvm.load %[[HEAP0]] : !llvm.ptr -> !llvm.struct<(ptr, i64)> -// CHECK: omp.target depend(taskdependout -> %[[HEAP0]] : !llvm.ptr) nowait map_entries(%[[VAL_33]] -> %[[VAL_35:.*]], %[[VAL_30]] -> %[[VAL_36:.*]], %[[VAL_32]] -> %[[VAL_37:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@boxchar_firstprivate %[[VAL_34]] -> %[[VAL_38:.*]] [map_idx=0], @private_eye %[[VAL_1]] -> %[[VAL_39:.*]] [map_idx=1] : !llvm.struct<(ptr, i64)>, !llvm.ptr) { +// CHECK: omp.target kernel_type(generic) depend(taskdependout -> %[[HEAP0]] : !llvm.ptr) nowait map_entries(%[[VAL_33]] -> %[[VAL_35:.*]], %[[VAL_30]] -> %[[VAL_36:.*]], %[[VAL_32]] -> %[[VAL_37:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@boxchar_firstprivate %[[VAL_34]] -> %[[VAL_38:.*]] [map_idx=0], @private_eye %[[VAL_1]] -> %[[VAL_39:.*]] [map_idx=1] : !llvm.struct<(ptr, i64)>, !llvm.ptr) { // CHECK: omp.terminator // CHECK: } // CHECK: omp.task depend(taskdependin -> %[[HEAP0]] : !llvm.ptr) { diff --git a/mlir/test/Dialect/OpenMP/omp-offload-privatization-prepare.mlir b/mlir/test/Dialect/OpenMP/omp-offload-privatization-prepare.mlir index 0377d4962cba9..99f9e525884b8 100644 --- a/mlir/test/Dialect/OpenMP/omp-offload-privatization-prepare.mlir +++ b/mlir/test/Dialect/OpenMP/omp-offload-privatization-prepare.mlir @@ -76,7 +76,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec // Test with two firstprivate variables so that we test that even if there are multiple variables to be cleaned up // only one cleanup omp.task is generated. - omp.target nowait map_entries(%124 -> %arg2, %160 -> %arg5, %159 -> %arg8, %1601 -> %arg9, %1591 -> %arg10 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@firstprivatizer %19 -> %arg11 [map_idx=1], @firstprivatizer_1 %20 -> %arg12 [map_idx=3] : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) nowait map_entries(%124 -> %arg2, %160 -> %arg5, %159 -> %arg8, %1601 -> %arg9, %1591 -> %arg10 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@firstprivatizer %19 -> %arg11 [map_idx=1], @firstprivatizer_1 %20 -> %arg12 [map_idx=3] : !llvm.ptr, !llvm.ptr) { omp.terminator } %166 = llvm.mlir.constant(48 : i32) : i32 @@ -135,7 +135,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec // CHECK: %[[VAL_35:.*]] = llvm.getelementptr %[[HEAP1]][0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> // CHECK: %[[VAL_36:.*]] = omp.map.info var_ptr(%[[HEAP1]] : !llvm.ptr, i32) map_clauses({{.*}}to{{.*}}) capture(ByRef) var_ptr_ptr(%[[VAL_35]] : !llvm.ptr) bounds(%[[VAL_29]]) -> !llvm.ptr {name = ""} // CHECK: %[[VAL_37:.*]] = omp.map.info var_ptr(%[[HEAP1]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(always,{{.*}}to) capture(ByRef) members(%[[VAL_36]] : [0] : !llvm.ptr) -> !llvm.ptr -// CHECK: omp.target depend(taskdependout -> %[[HEAP0]] : !llvm.ptr) nowait map_entries(%[[VAL_11]] -> %[[VAL_38:.*]], %[[VAL_34]] -> %[[VAL_39:.*]], %[[VAL_33]] -> %[[VAL_40:.*]], %[[VAL_37]] -> %[[VAL_41:.*]], %[[VAL_36]] -> %[[VAL_42:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@firstprivatizer %[[HEAP0]] -> %[[VAL_43:.*]] [map_idx=1], @firstprivatizer_1 %[[HEAP1]] -> %[[VAL_44:.*]] [map_idx=3] : !llvm.ptr, !llvm.ptr) { +// CHECK: omp.target kernel_type(generic) depend(taskdependout -> %[[HEAP0]] : !llvm.ptr) nowait map_entries(%[[VAL_11]] -> %[[VAL_38:.*]], %[[VAL_34]] -> %[[VAL_39:.*]], %[[VAL_33]] -> %[[VAL_40:.*]], %[[VAL_37]] -> %[[VAL_41:.*]], %[[VAL_36]] -> %[[VAL_42:.*]] : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@firstprivatizer %[[HEAP0]] -> %[[VAL_43:.*]] [map_idx=1], @firstprivatizer_1 %[[HEAP1]] -> %[[VAL_44:.*]] [map_idx=3] : !llvm.ptr, !llvm.ptr) { // CHECK: omp.terminator // CHECK: } // CHECK: omp.task depend(taskdependin -> %[[HEAP0]] : !llvm.ptr) { diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 1f9544301b184..bfaac91cc5c34 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -829,36 +829,74 @@ func.func @omp_distribute(%chunk_size : i32, %data_var : memref, %arg0 : i3 } // CHECK-LABEL: omp_target -func.func @omp_target(%if_cond : i1, %device : si32, %num_threads : i32, %device_ptr: memref, %device_addr: memref, %map1: memref, %map2: memref) -> () { +func.func @omp_target(%if_cond : i1, %device : si32, %num_threads : i32, %device_ptr: memref, %device_addr: memref, %map1: memref, %map2: memref, %n: i32) -> () { // Test with optional operands; if_expr, device, thread_limit, private, firstprivate and nowait. - // CHECK: omp.target device({{.*}}) if({{.*}}) nowait thread_limit({{.*}}) + // CHECK: omp.target kernel_type(generic) device({{.*}}) if({{.*}}) nowait thread_limit({{.*}}) "omp.target"(%device, %if_cond, %num_threads) ({ // CHECK: omp.terminator omp.terminator - }) {nowait, operandSegmentSizes = array} : ( si32, i1, i32 ) -> () + }) {kernel_type = #omp, nowait, operandSegmentSizes = array} : ( si32, i1, i32 ) -> () // Test with optional map clause. // CHECK: %[[MAP_A:.*]] = omp.map.info var_ptr(%[[VAL_1:.*]] : memref, tensor) map_clauses(always, to) capture(ByRef) -> memref {name = ""} // CHECK: %[[MAP_B:.*]] = omp.map.info var_ptr(%[[VAL_2:.*]] : memref, tensor) map_clauses(tofrom) capture(ByRef) -> memref {name = ""} // CHECK: %[[MAP_C:.*]] = omp.map.info var_ptr(%[[VAL_3:.*]] : memref, tensor) map_clauses(storage) capture(ByRef) -> memref {name = ""} - // CHECK: omp.target is_device_ptr(%[[VAL_4:.*]] : memref) has_device_addr(%[[MAP_A]] -> {{.*}} : memref) map_entries(%[[MAP_B]] -> {{.*}}, %[[MAP_C]] -> {{.*}} : memref, memref) { + // CHECK: omp.target kernel_type(generic) is_device_ptr(%[[VAL_4:.*]] : memref) has_device_addr(%[[MAP_A]] -> {{.*}} : memref) map_entries(%[[MAP_B]] -> {{.*}}, %[[MAP_C]] -> {{.*}} : memref, memref) { %mapv0 = omp.map.info var_ptr(%device_addr : memref, tensor) map_clauses(always, to) capture(ByRef) -> memref {name = ""} %mapv1 = omp.map.info var_ptr(%map1 : memref, tensor) map_clauses(tofrom) capture(ByRef) -> memref {name = ""} %mapv2 = omp.map.info var_ptr(%map2 : memref, tensor) map_clauses(storage) capture(ByRef) -> memref {name = ""} - omp.target is_device_ptr(%device_ptr : memref) has_device_addr(%mapv0 -> %arg0 : memref) map_entries(%mapv1 -> %arg1, %mapv2 -> %arg2 : memref, memref) { + omp.target kernel_type(generic) is_device_ptr(%device_ptr : memref) has_device_addr(%mapv0 -> %arg0 : memref) map_entries(%mapv1 -> %arg1, %mapv2 -> %arg2 : memref, memref) { omp.terminator } // CHECK: %[[MAP_D:.*]] = omp.map.info var_ptr(%[[VAL_1:.*]] : memref, tensor) map_clauses(to) capture(ByRef) -> memref {name = ""} // CHECK: %[[MAP_E:.*]] = omp.map.info var_ptr(%[[VAL_2:.*]] : memref, tensor) map_clauses(always, from) capture(ByRef) -> memref {name = ""} - // CHECK: omp.target map_entries(%[[MAP_D]] -> {{.*}}, %[[MAP_E]] -> {{.*}} : memref, memref) { + // CHECK: omp.target kernel_type(generic) map_entries(%[[MAP_D]] -> {{.*}}, %[[MAP_E]] -> {{.*}} : memref, memref) { %mapv3 = omp.map.info var_ptr(%map1 : memref, tensor) map_clauses(to) capture(ByRef) -> memref {name = ""} %mapv4 = omp.map.info var_ptr(%map2 : memref, tensor) map_clauses(always, from) capture(ByRef) -> memref {name = ""} - omp.target map_entries(%mapv3 -> %arg0, %mapv4 -> %arg1 : memref, memref) { + omp.target kernel_type(generic) map_entries(%mapv3 -> %arg0, %mapv4 -> %arg1 : memref, memref) { + omp.terminator + } + + // Test non-generic kernel types. + // CHECK: omp.target kernel_type(bare) { + // CHECK: omp.teams { + omp.target kernel_type(bare) { + omp.teams { + omp.terminator + } + omp.terminator + } + // CHECK: omp.target kernel_type(spmd) host_eval({{.*}}) { + // CHECK: omp.parallel { + // CHECK: omp.wsloop { + // CHECK: omp.loop_nest + omp.target kernel_type(spmd) host_eval(%n -> %arg0 : i32) { + omp.parallel { + omp.wsloop { + omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { + omp.yield + } + } + omp.terminator + } + omp.terminator + } + // CHECK: omp.target kernel_type(spmd_no_loop) host_eval({{.*}}) { + // CHECK: omp.parallel { + // CHECK: omp.wsloop { + // CHECK: omp.loop_nest + omp.target kernel_type(spmd_no_loop) host_eval(%n -> %arg0 : i32) { + omp.parallel { + omp.wsloop { + omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { + omp.yield + } + } + omp.terminator + } omp.terminator } - // CHECK: omp.barrier - omp.barrier return } @@ -908,13 +946,13 @@ func.func @omp_target_data (%if_cond : i1, %device : si32, %device_ptr: memref () { - // CHECK: omp.target device({{.*}}) if({{.*}}) - omp.target if(%if_cond) device(%device : si32) { + // CHECK: omp.target kernel_type(generic) device({{.*}}) if({{.*}}) + omp.target kernel_type(generic) if(%if_cond) device(%device : si32) { omp.terminator } - // CHECK: omp.target device({{.*}}) if({{.*}}) nowait - omp.target if(%if_cond) device(%device : si32) thread_limit(%num_threads : i32) nowait { + // CHECK: omp.target kernel_type(generic) device({{.*}}) if({{.*}}) nowait + omp.target kernel_type(generic) if(%if_cond) device(%device : si32) thread_limit(%num_threads : i32) nowait { omp.terminator } @@ -1091,7 +1129,7 @@ func.func @parallel_wsloop_reduction(%lb : index, %ub : index, %step : index) { func.func @omp_teams(%lb : i32, %ub : i32, %if_cond : i1, %num_threads : i32, %data_var : memref, %ub64 : i64, %ub16 : i16) -> () { // Test nesting inside of omp.target - omp.target { + omp.target kernel_type(generic) { // CHECK: omp.teams omp.teams { // CHECK: omp.terminator @@ -2238,8 +2276,8 @@ func.func @omp_task_depend(%arg0: memref, %arg1: memref) { // CHECK-LABEL: @omp_target_depend // CHECK-SAME: (%arg0: memref, %arg1: memref) { func.func @omp_target_depend(%arg0: memref, %arg1: memref) { - // CHECK: omp.target depend(taskdependin -> %arg0 : memref, taskdependin -> %arg1 : memref, taskdependinout -> %arg0 : memref) { - omp.target depend(taskdependin -> %arg0 : memref, taskdependin -> %arg1 : memref, taskdependinout -> %arg0 : memref) { + // CHECK: omp.target kernel_type(generic) depend(taskdependin -> %arg0 : memref, taskdependin -> %arg1 : memref, taskdependinout -> %arg0 : memref) { + omp.target kernel_type(generic) depend(taskdependin -> %arg0 : memref, taskdependin -> %arg1 : memref, taskdependinout -> %arg0 : memref) { // CHECK: omp.terminator omp.terminator } {operandSegmentSizes = array} @@ -2837,8 +2875,8 @@ func.func @omp_targets_with_map_bounds(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> () %10 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) stride(%8 : i64) start_idx(%9 : i64) %mapv2 = omp.map.info var_ptr(%arg1 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(storage) capture(ByCopy) mapper(@my_mapper) bounds(%10) -> !llvm.ptr {name = ""} - // CHECK: omp.target map_entries(%[[MAP0]] -> {{.*}}, %[[MAP1]] -> {{.*}} : !llvm.ptr, !llvm.ptr) - omp.target map_entries(%mapv1 -> %arg2, %mapv2 -> %arg3 : !llvm.ptr, !llvm.ptr) { + // CHECK: omp.target kernel_type(generic) map_entries(%[[MAP0]] -> {{.*}}, %[[MAP1]] -> {{.*}} : !llvm.ptr, !llvm.ptr) + omp.target kernel_type(generic) map_entries(%mapv1 -> %arg2, %mapv2 -> %arg3 : !llvm.ptr, !llvm.ptr) { omp.terminator } @@ -2876,8 +2914,8 @@ func.func @omp_targets_is_allocatable(%arg0: !llvm.ptr, %arg1: !llvm.ptr) -> () %mapv1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} // CHECK: %[[MAP1:.*]] = omp.map.info var_ptr(%[[ARG1]] : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) members(%[[MAP0]] : [0] : !llvm.ptr) -> !llvm.ptr {name = ""} %mapv2 = omp.map.info var_ptr(%arg1 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) members(%mapv1 : [0] : !llvm.ptr) -> !llvm.ptr {name = ""} - // CHECK: omp.target map_entries(%[[MAP0]] -> {{.*}}, %[[MAP1]] -> {{.*}} : !llvm.ptr, !llvm.ptr) - omp.target map_entries(%mapv1 -> %arg2, %mapv2 -> %arg3 : !llvm.ptr, !llvm.ptr) { + // CHECK: omp.target kernel_type(generic) map_entries(%[[MAP0]] -> {{.*}}, %[[MAP1]] -> {{.*}} : !llvm.ptr, !llvm.ptr) + omp.target kernel_type(generic) map_entries(%mapv1 -> %arg2, %mapv2 -> %arg3 : !llvm.ptr, !llvm.ptr) { omp.terminator } return @@ -2904,8 +2942,8 @@ func.func @omp_target_enter_update_exit_data_depend(%a: memref, %b: memre omp.target_enter_data depend(taskdependin -> %a: memref) nowait map_entries(%map_a, %map_c: memref, memref) // Compute 'b' on the target and copy it back - // CHECK: omp.target map_entries([[MAP1]] -> {{%.*}} : memref) { - omp.target map_entries(%map_b -> %arg0 : memref) { + // CHECK: omp.target kernel_type(generic) map_entries([[MAP1]] -> {{%.*}} : memref) { + omp.target kernel_type(generic) map_entries(%map_b -> %arg0 : memref) { "test.foo"(%arg0) : (memref) -> () omp.terminator } @@ -2921,7 +2959,7 @@ func.func @omp_target_enter_update_exit_data_depend(%a: memref, %b: memre // Compute 'c' on the target and copy it back %map_c_from = omp.map.info var_ptr(%c: memref, tensor) map_clauses(from) capture(ByRef) -> memref - omp.target depend(taskdependout -> %c : memref) map_entries(%map_a -> %arg0, %map_c_from -> %arg1 : memref, memref) { + omp.target kernel_type(generic) depend(taskdependout -> %c : memref) map_entries(%map_a -> %arg0, %map_c_from -> %arg1 : memref, memref) { "test.foobar"() : ()->() omp.terminator } @@ -3060,7 +3098,7 @@ func.func @omp_target_private(%map1: memref, %map2: memref, %priv_ // CHECK-SAME: @x.privatizer %{{[^[:space:]]+}} -> %[[PRIV_ARG:[^[:space:]]+]] // CHECK-SAME: : !llvm.ptr // CHECK-SAME: ) - omp.target private(@x.privatizer %priv_var -> %priv_arg : !llvm.ptr) { + omp.target kernel_type(generic) private(@x.privatizer %priv_var -> %priv_arg : !llvm.ptr) { omp.terminator } @@ -3076,7 +3114,7 @@ func.func @omp_target_private(%map1: memref, %map2: memref, %priv_ // CHECK-SAME: @x.privatizer %{{[^[:space:]]+}} -> %[[PRIV_ARG:[^[:space:]]+]] // CHECK-SAME: : !llvm.ptr // CHECK-SAME: ) - omp.target map_entries(%mapv1 -> %arg0, %mapv2 -> %arg1 : memref, memref) private(@x.privatizer %priv_var -> %priv_arg : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%mapv1 -> %arg0, %mapv2 -> %arg1 : memref, memref) private(@x.privatizer %priv_var -> %priv_arg : !llvm.ptr) { omp.terminator } @@ -3100,7 +3138,7 @@ func.func @omp_target_private_with_map_idx(%map1: memref, %map2: memref %[[PRIV_ARG:[^[:space:]]+]] [map_idx=1] // CHECK-SAME: : !llvm.ptr // CHECK-SAME: ) - omp.target map_entries(%mapv1 -> %arg0, %mapv2 -> %arg1 : memref, memref) private(@x.privatizer %priv_var -> %priv_arg [map_idx=1] : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%mapv1 -> %arg0, %mapv2 -> %arg1 : memref, memref) private(@x.privatizer %priv_var -> %priv_arg [map_idx=1] : !llvm.ptr) { omp.terminator } @@ -3108,23 +3146,23 @@ func.func @omp_target_private_with_map_idx(%map1: memref, %map2: memref %[[HOST_ARG:.*]] : i32) { + // CHECK: omp.target kernel_type(generic) host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { // CHECK: omp.teams num_teams( to %[[HOST_ARG]] : i32) // CHECK-SAME: thread_limit(%[[HOST_ARG]] : i32) - omp.target host_eval(%x -> %arg0 : i32) { + omp.target kernel_type(generic) host_eval(%x -> %arg0 : i32) { omp.teams num_teams(to %arg0 : i32) thread_limit(%arg0 : i32) { omp.terminator } omp.terminator } - // CHECK: omp.target host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { + // CHECK: omp.target kernel_type(spmd) host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { // CHECK: omp.teams { // CHECK: omp.parallel num_threads(%[[HOST_ARG]] : i32) { // CHECK: omp.distribute { // CHECK: omp.wsloop { // CHECK: omp.loop_nest (%{{.*}}) : i32 = (%[[HOST_ARG]]) to (%[[HOST_ARG]]) step (%[[HOST_ARG]]) { - omp.target host_eval(%x -> %arg0 : i32) { + omp.target kernel_type(spmd) host_eval(%x -> %arg0 : i32) { omp.teams { omp.parallel num_threads(%arg0 : i32) { omp.distribute { @@ -3141,15 +3179,14 @@ func.func @omp_target_host_eval(%x : i32) { omp.terminator } - // CHECK: omp.target host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { + // CHECK: omp.target kernel_type(spmd) host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { // CHECK: omp.parallel num_threads(%[[HOST_ARG]] : i32) { // CHECK: omp.wsloop { // CHECK: omp.loop_nest - omp.target host_eval(%x -> %arg0 : i32) { - %y = arith.constant 2 : i32 + omp.target kernel_type(spmd) host_eval(%x -> %arg0 : i32) { omp.parallel num_threads(%arg0 : i32) { omp.wsloop { - omp.loop_nest (%iv) : i32 = (%y) to (%y) step (%y) { + omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { omp.yield } } @@ -3158,11 +3195,20 @@ func.func @omp_target_host_eval(%x : i32) { omp.terminator } - // CHECK: omp.target host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { + // CHECK: omp.target kernel_type(generic) host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { + // CHECK: omp.parallel num_threads(%[[HOST_ARG]] : i32) { + omp.target kernel_type(generic) host_eval(%x -> %arg0 : i32) { + omp.parallel num_threads(%arg0 : i32) { + omp.terminator + } + omp.terminator + } + + // CHECK: omp.target kernel_type(generic) host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { // CHECK: omp.teams { // CHECK: omp.distribute { // CHECK: omp.loop_nest (%{{.*}}) : i32 = (%[[HOST_ARG]]) to (%[[HOST_ARG]]) step (%[[HOST_ARG]]) { - omp.target host_eval(%x -> %arg0 : i32) { + omp.target kernel_type(generic) host_eval(%x -> %arg0 : i32) { omp.teams { omp.distribute { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { @@ -3174,11 +3220,11 @@ func.func @omp_target_host_eval(%x : i32) { omp.terminator } - // CHECK: omp.target host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { + // CHECK: omp.target kernel_type(spmd) host_eval(%{{.*}} -> %[[HOST_ARG:.*]] : i32) { // CHECK: omp.teams { // CHECK: omp.loop { // CHECK: omp.loop_nest (%{{.*}}) : i32 = (%[[HOST_ARG]]) to (%[[HOST_ARG]]) step (%[[HOST_ARG]]) { - omp.target host_eval(%x -> %arg0 : i32) { + omp.target kernel_type(spmd) host_eval(%x -> %arg0 : i32) { omp.teams { omp.loop { omp.loop_nest (%iv) : i32 = (%arg0) to (%arg0) step (%arg0) { diff --git a/mlir/test/Dialect/OpenMP/stack-to-shared.mlir b/mlir/test/Dialect/OpenMP/stack-to-shared.mlir index d14528e4f396a..8606395762df3 100644 --- a/mlir/test/Dialect/OpenMP/stack-to-shared.mlir +++ b/mlir/test/Dialect/OpenMP/stack-to-shared.mlir @@ -99,8 +99,8 @@ llvm.func @host_func(%arg0: i64) { omp.parallel { // CHECK: llvm.call @foo(%[[ALLOC0]]) : (!llvm.ptr) -> () llvm.call @foo(%0) : (!llvm.ptr) -> () - // CHECK: omp.target - omp.target { + // CHECK: omp.target kernel_type(generic) + omp.target kernel_type(generic) { %c0 = llvm.mlir.constant(1 : i64) : i64 // CHECK: %[[ALLOC1:.*]] = omp.alloc_shared_mem [[ALLOC1_SIZE:.*]] -> !llvm.ptr %1 = llvm.alloca %c0 x i32 : (i64) -> !llvm.ptr @@ -119,7 +119,7 @@ llvm.func @host_func(%arg0: i64) { llvm.func @target_spmd() { // CHECK-NOT: omp.alloc_shared_mem // CHECK-NOT: omp.free_shared_mem - omp.target { + omp.target kernel_type(spmd) { %c = llvm.mlir.constant(1 : i64) : i64 %0 = llvm.alloca %c x i32 : (i64) -> !llvm.ptr omp.teams { diff --git a/mlir/test/Target/LLVMIR/allocatable_gpu_reduction.mlir b/mlir/test/Target/LLVMIR/allocatable_gpu_reduction.mlir index c9ff6de8cc951..812021d081d21 100644 --- a/mlir/test/Target/LLVMIR/allocatable_gpu_reduction.mlir +++ b/mlir/test/Target/LLVMIR/allocatable_gpu_reduction.mlir @@ -48,7 +48,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 : %8 = llvm.getelementptr %5[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> %9 = omp.map.info var_ptr(%5 : !llvm.ptr, f32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%8 : !llvm.ptr) -> !llvm.ptr {name = ""} %10 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, implicit, descriptor, to) capture(ByRef) members(%9 : [0] : !llvm.ptr) -> !llvm.ptr {name = "scalar_alloc"} - omp.target map_entries(%10 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(spmd) map_entries(%10 -> %arg0 : !llvm.ptr) { %13 = llvm.mlir.constant(1000 : i32) : i32 %14 = llvm.mlir.constant(1 : i32) : i32 omp.parallel { diff --git a/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir b/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir index 1c73a49b0bf9f..ab1b675fa84f6 100644 --- a/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir +++ b/mlir/test/Target/LLVMIR/allocatable_gpu_reduction_teams.mlir @@ -46,7 +46,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 : %8 = llvm.getelementptr %5[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> %9 = omp.map.info var_ptr(%5 : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%8 : !llvm.ptr) -> !llvm.ptr {name = ""} %10 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(always, descriptor, to, attach) capture(ByRef) members(%9 : [0] : !llvm.ptr) -> !llvm.ptr {name = "scalar_alloc"} - omp.target map_entries(%10 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(spmd) map_entries(%10 -> %arg0 : !llvm.ptr) { %14 = llvm.mlir.constant(1000000 : i32) : i32 %15 = llvm.mlir.constant(1 : i32) : i32 omp.teams reduction(byref @add_reduction_byref_box_heap_f32 %arg0 -> %arg3 : !llvm.ptr) { diff --git a/mlir/test/Target/LLVMIR/omp-target-call-with-repeated-parameter.mlir b/mlir/test/Target/LLVMIR/omp-target-call-with-repeated-parameter.mlir index ebaecc8cf203b..38e84718025a5 100644 --- a/mlir/test/Target/LLVMIR/omp-target-call-with-repeated-parameter.mlir +++ b/mlir/test/Target/LLVMIR/omp-target-call-with-repeated-parameter.mlir @@ -6,7 +6,7 @@ llvm.func @caller_() { %i_host = llvm.alloca %c1 x i32 {bindc_name = "i"} : (i64) -> !llvm.ptr %x_map = omp.map.info var_ptr(%x_host : !llvm.ptr, f32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "x"} %i_map = omp.map.info var_ptr(%i_host : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} - omp.target map_entries(%x_map -> %x_arg, %i_map -> %i_arg : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%x_map -> %x_arg, %i_map -> %i_arg : !llvm.ptr, !llvm.ptr) { %1 = llvm.load %i_arg : !llvm.ptr -> i32 %2 = llvm.sitofp %1 : i32 to f32 llvm.store %2, %x_arg : f32, !llvm.ptr diff --git a/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir index cbfeb18a04f5b..0d7555a710048 100644 --- a/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-array-sectioning-host.mlir @@ -18,7 +18,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a %6 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%2 : i64) stride(%2 : i64) start_idx(%2 : i64) %7 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>) map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %6) -> !llvm.ptr {name = "inarray(1:3,1:3,2:2)"} %8 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>>) map_clauses(tofrom) capture(ByRef) bounds(%5, %5, %5) -> !llvm.ptr {name = "outarray(1:3,1:3,1:3)"} - omp.target map_entries(%7 -> %arg0, %8 -> %arg1 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%7 -> %arg0, %8 -> %arg1 : !llvm.ptr, !llvm.ptr) { %9 = llvm.mlir.constant(0 : i64) : i64 %10 = llvm.mlir.constant(1 : i64) : i64 %11 = llvm.getelementptr %arg0[0, %10, %9, %9] : (!llvm.ptr, i64, i64, i64) -> !llvm.ptr, !llvm.array<3 x array<3 x array<3 x i32>>> diff --git a/mlir/test/Target/LLVMIR/omptarget-atomic-capture-control-options.mlir b/mlir/test/Target/LLVMIR/omptarget-atomic-capture-control-options.mlir index 355390719322f..bffcf8b5c4c71 100644 --- a/mlir/test/Target/LLVMIR/omptarget-atomic-capture-control-options.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-atomic-capture-control-options.mlir @@ -23,7 +23,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 %14 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "threads"} %15 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "capture"} %16 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "a"} - omp.target map_entries(%14 -> %arg0, %15 -> %arg1, %16 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%14 -> %arg0, %15 -> %arg1, %16 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { %17 = llvm.mlir.constant(1 : i32) : i32 %18 = llvm.load %arg0 : !llvm.ptr -> i32 omp.parallel num_threads(%18 : i32) { diff --git a/mlir/test/Target/LLVMIR/omptarget-atomic-update-control-options.mlir b/mlir/test/Target/LLVMIR/omptarget-atomic-update-control-options.mlir index 3b0005bd20798..a083981ab4fa7 100644 --- a/mlir/test/Target/LLVMIR/omptarget-atomic-update-control-options.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-atomic-update-control-options.mlir @@ -18,7 +18,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<4 llvm.store %6, %5 : i32, !llvm.ptr %10 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "threads"} %11 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "a"} - omp.target map_entries(%10 -> %arg0, %11 -> %arg1 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%10 -> %arg0, %11 -> %arg1 : !llvm.ptr, !llvm.ptr) { %12 = llvm.mlir.constant(1 : i32) : i32 %13 = llvm.load %arg0 : !llvm.ptr -> i32 omp.parallel num_threads(%13 : i32) { diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir index e9c77ef015336..eb7918740cbef 100644 --- a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-device.mlir @@ -6,7 +6,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"} %3 = omp.map.info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"} - omp.target map_entries(%2 -> %arg0, %3 -> %arg1 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%2 -> %arg0, %3 -> %arg1 : !llvm.ptr, !llvm.ptr) { %4 = llvm.load %arg1 : !llvm.ptr -> i32 llvm.store %4, %arg0 : i32, !llvm.ptr omp.terminator diff --git a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir index 871f5caf7b2ff..944ecca6828bd 100644 --- a/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-byref-bycopy-generation-host.mlir @@ -6,7 +6,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a %1 = llvm.mlir.addressof @_QFEsp : !llvm.ptr %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"} %3 = omp.map.info var_ptr(%0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr {name = "i"} - omp.target map_entries(%2 -> %arg0, %3 -> %arg1 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%2 -> %arg0, %3 -> %arg1 : !llvm.ptr, !llvm.ptr) { %4 = llvm.load %arg1 : !llvm.ptr -> i32 llvm.store %4, %arg0 : i32, !llvm.ptr omp.terminator diff --git a/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir b/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir index 3543a23f46d7d..dae2d2daa7091 100644 --- a/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-constant-alloca-raise.mlir @@ -15,7 +15,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %1 = llvm.mlir.constant(1 : i64) : i64 %2 = llvm.alloca %1 x !llvm.struct<(ptr)> : (i64) -> !llvm.ptr %3 = omp.map.info var_ptr(%2 : !llvm.ptr, !llvm.struct<(ptr)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr - omp.target map_entries(%3 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%3 -> %arg0 : !llvm.ptr) { %4 = llvm.mlir.constant(1 : i32) : i32 %5 = llvm.alloca %4 x !llvm.struct<(ptr)> {alignment = 8 : i64} : (i32) -> !llvm.ptr<5> %ascast1 = llvm.addrspacecast %5 : !llvm.ptr<5> to !llvm.ptr diff --git a/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir b/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir index f21ffc45c8bcc..6ea363eccb989 100644 --- a/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-constant-indexing-device-region.mlir @@ -4,7 +4,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo llvm.func @_QQmain() attributes {bindc_name = "main"} { %0 = llvm.mlir.addressof @_QFEsp : !llvm.ptr %1 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sp"} - omp.target map_entries(%1 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%1 -> %arg0 : !llvm.ptr) { %2 = llvm.mlir.constant(20 : i32) : i32 %3 = llvm.mlir.constant(0 : i64) : i64 %4 = llvm.getelementptr %arg0[0, %3] : (!llvm.ptr, i64) -> !llvm.ptr, !llvm.array<10 x i32> diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-147063.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-147063.mlir index 12d389adbb388..55e5398440391 100644 --- a/mlir/test/Target/LLVMIR/omptarget-debug-147063.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-debug-147063.mlir @@ -9,7 +9,7 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_gpu = omp.parallel private(@_QFFfnEv_private_i32 %1 -> %arg0 : !llvm.ptr) { llvm.store %2, %arg0 : i32, !llvm.ptr loc(#loc2) %4 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "v"} loc(#loc2) - omp.target map_entries(%4 -> %arg1 : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%4 -> %arg1 : !llvm.ptr) { %5 = llvm.mlir.constant(1 : i32) : i32 %6 = llvm.load %arg1 : !llvm.ptr -> i32 loc(#loc3) %7 = llvm.add %6, %5 : i32 loc(#loc3) diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-empty.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-empty.mlir index 45e5d2612e2c2..cc7c8c395116d 100644 --- a/mlir/test/Target/LLVMIR/omptarget-debug-empty.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-debug-empty.mlir @@ -2,7 +2,7 @@ module attributes {omp.is_target_device = false} { llvm.func @test() { - omp.target { + omp.target kernel_type(generic) { omp.terminator } loc(#loc4) llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir index aa4c1f0354fdc..fecc959b59d25 100644 --- a/mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-debug-loop-loc.mlir @@ -14,7 +14,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %7 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "j"} loc(#loc3) %8 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.array<16384 x i32>) map_clauses(implicit, tofrom) capture(ByRef) -> !llvm.ptr {name = "array"} loc(#loc3) %9 = omp.map.info var_ptr(%4 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} loc(#loc3) - omp.target map_entries(%6 -> %arg0, %7 -> %arg2, %8 -> %arg4, %9 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(spmd) map_entries(%6 -> %arg0, %7 -> %arg2, %8 -> %arg4, %9 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { %10 = llvm.mlir.constant(1 : i32) : i32 %11 = llvm.mlir.constant(16384 : i32) : i32 omp.teams { diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-map-link-loc.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-map-link-loc.mlir index 492610251769c..d8181b0c3fb17 100644 --- a/mlir/test/Target/LLVMIR/omptarget-debug-map-link-loc.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-debug-map-link-loc.mlir @@ -12,7 +12,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %6 = llvm.mlir.addressof @_QMtest_0Esp : !llvm.ptr loc(#loc1) %7 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr loc(#loc3) %8 = omp.map.info var_ptr(%6 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr loc(#loc3) - omp.target map_entries(%7 -> %arg0, %8 -> %arg1 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%7 -> %arg0, %8 -> %arg1 : !llvm.ptr, !llvm.ptr) { %16 = llvm.load %arg1 : !llvm.ptr -> i32 loc(#loc5) llvm.store %16, %arg0 : i32, !llvm.ptr loc(#loc5) omp.terminator loc(#loc5) diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-nowait.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-nowait.mlir index 3bd724f42e8ce..da33e5f4838d4 100644 --- a/mlir/test/Target/LLVMIR/omptarget-debug-nowait.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-debug-nowait.mlir @@ -7,7 +7,7 @@ module attributes {omp.is_target_device = false} { %3 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr %6 = omp.map.info var_ptr(%1 : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr %7 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr - omp.target nowait map_entries(%6 -> %arg0, %7 -> %arg1 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) nowait map_entries(%6 -> %arg0, %7 -> %arg1 : !llvm.ptr, !llvm.ptr) { %8 = llvm.mlir.constant(0 : i64) : i64 %9 = llvm.mlir.constant(100 : i32) : i32 llvm.br ^bb1(%9, %8 : i32, i64) diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir index f5ed9646cf33c..4c79f395bffcd 100644 --- a/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-debug-var-1.mlir @@ -39,7 +39,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %4 = omp.map.info var_ptr(%ascast : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr %5 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr %6 = omp.map.info var_ptr(%ascast2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr - omp.target map_entries(%4 -> %arg0, %5 -> %arg1, %6 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%4 -> %arg0, %5 -> %arg1, %6 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { llvm.intr.dbg.declare #var_x = %arg0 : !llvm.ptr llvm.intr.dbg.declare #var_arr = %arg1 : !llvm.ptr llvm.intr.dbg.declare #var_i = %arg2 : !llvm.ptr diff --git a/mlir/test/Target/LLVMIR/omptarget-debug-var-2.mlir b/mlir/test/Target/LLVMIR/omptarget-debug-var-2.mlir index 11a07dfd9a180..2c78ae4c33f75 100644 --- a/mlir/test/Target/LLVMIR/omptarget-debug-var-2.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-debug-var-2.mlir @@ -42,7 +42,7 @@ module attributes {omp.is_target_device = false} { %15 = omp.map.bounds lower_bound(%7 : i64) upper_bound(%6 : i64) extent(%10 : i64) stride(%8 : i64) start_idx(%8 : i64) %16 = omp.map.info var_ptr(%11 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%15) -> !llvm.ptr %17 = omp.map.info var_ptr(%4 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr - omp.target map_entries(%14 -> %arg0, %16 -> %arg1, %17 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%14 -> %arg0, %16 -> %arg1, %17 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { llvm.intr.dbg.declare #var_x = %arg0 : !llvm.ptr llvm.intr.dbg.declare #var_arr = %arg1 : !llvm.ptr llvm.intr.dbg.declare #var_i = %arg2 : !llvm.ptr diff --git a/mlir/test/Target/LLVMIR/omptarget-debug.mlir b/mlir/test/Target/LLVMIR/omptarget-debug.mlir index ab687f198b9b4..761f6af3989c5 100644 --- a/mlir/test/Target/LLVMIR/omptarget-debug.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-debug.mlir @@ -6,7 +6,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr<5> %ascast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr %9 = omp.map.info var_ptr(%ascast : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries(%9 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%9 -> %arg0 : !llvm.ptr) { %13 = llvm.mlir.constant(1 : i32) : i32 llvm.store %13, %arg0 : i32, !llvm.ptr loc(#loc2) omp.terminator diff --git a/mlir/test/Target/LLVMIR/omptarget-debug2.mlir b/mlir/test/Target/LLVMIR/omptarget-debug2.mlir index 6cf75af38f916..7ed003ef506e3 100644 --- a/mlir/test/Target/LLVMIR/omptarget-debug2.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-debug2.mlir @@ -7,7 +7,7 @@ module attributes {omp.is_target_device = false} { %0 = llvm.mlir.constant(1 : i32) : i32 %1 = llvm.alloca %0 x i32 : (i32) -> !llvm.ptr %9 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries(%9 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%9 -> %arg0 : !llvm.ptr) { %13 = llvm.mlir.constant(1 : i32) : i32 llvm.store %13, %arg0 : i32, !llvm.ptr loc(#loc2) omp.terminator diff --git a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir index d37b21f20153b..bc59fdda2cc25 100644 --- a/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-declare-target-llvm-device.mlir @@ -22,7 +22,7 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic // CHECK-DAG: store i32 1, ptr %[[V]], align 4 // CHECK-DAG: br label %omp.region.cont %map = omp.map.info var_ptr(%0 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries(%map -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%map -> %arg0 : !llvm.ptr) { %1 = llvm.mlir.constant(1 : i32) : i32 llvm.store %1, %arg0 : i32, !llvm.ptr omp.terminator diff --git a/mlir/test/Target/LLVMIR/omptarget-declare-target-to-device.mlir b/mlir/test/Target/LLVMIR/omptarget-declare-target-to-device.mlir index 143d605658294..fd4c024d5cdae 100644 --- a/mlir/test/Target/LLVMIR/omptarget-declare-target-to-device.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-declare-target-to-device.mlir @@ -19,7 +19,7 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, llvm.func @_QQmain() { %0 = llvm.mlir.addressof @_QMtest_0Ezii : !llvm.ptr %1 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<11 x f32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr - omp.target map_entries(%1 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%1 -> %arg0 : !llvm.ptr) { %2 = llvm.mlir.constant(1.0 : f32) : f32 %3 = llvm.mlir.constant(0 : i64) : i64 %4 = llvm.getelementptr %arg0[%3] : (!llvm.ptr, i64) -> !llvm.ptr, f32 diff --git a/mlir/test/Target/LLVMIR/omptarget-declare-target-to-host.mlir b/mlir/test/Target/LLVMIR/omptarget-declare-target-to-host.mlir index 4202421aed5ac..fc60008a76c3d 100644 --- a/mlir/test/Target/LLVMIR/omptarget-declare-target-to-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-declare-target-to-host.mlir @@ -21,7 +21,7 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_gpu = %3 = llvm.mlir.addressof @_QMtest_0Ezii : !llvm.ptr %4 = omp.map.bounds lower_bound(%1 : i64) upper_bound(%2 : i64) extent(%2 : i64) stride(%0 : i64) start_idx(%1 : i64) {stride_in_bytes = true} %5 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.array<11 x f32>) map_clauses(tofrom) capture(ByRef) bounds(%4) -> !llvm.ptr - omp.target map_entries(%5 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%5 -> %arg0 : !llvm.ptr) { %6 = llvm.mlir.constant(1.0 : f32) : f32 %7 = llvm.mlir.constant(0 : i64) : i64 %8 = llvm.getelementptr %arg0[%7] : (!llvm.ptr, i64) -> !llvm.ptr, f32 diff --git a/mlir/test/Target/LLVMIR/omptarget-depend-host-only.mlir b/mlir/test/Target/LLVMIR/omptarget-depend-host-only.mlir index ece32bb5419c6..06b2399b71e94 100644 --- a/mlir/test/Target/LLVMIR/omptarget-depend-host-only.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-depend-host-only.mlir @@ -8,7 +8,7 @@ module attributes {omp.is_target_device = false} { %3 = omp.map.bounds lower_bound(%1 : i64) upper_bound(%0 : i64) extent(%2 : i64) stride(%1 : i64) start_idx(%1 : i64) %4 = llvm.mlir.addressof @_QFEa : !llvm.ptr %5 = omp.map.info var_ptr(%4 : !llvm.ptr, !llvm.array<40 x i32>) map_clauses(from) capture(ByRef) bounds(%3) -> !llvm.ptr {name = "a"} - omp.target depend(taskdependin -> %4 : !llvm.ptr) map_entries(%5 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) depend(taskdependin -> %4 : !llvm.ptr) map_entries(%5 -> %arg0 : !llvm.ptr) { %6 = llvm.mlir.constant(100 : index) : i32 llvm.store %6, %arg0 : i32, !llvm.ptr omp.terminator diff --git a/mlir/test/Target/LLVMIR/omptarget-depend.mlir b/mlir/test/Target/LLVMIR/omptarget-depend.mlir index 0f2437639319a..5ada9c0382aeb 100644 --- a/mlir/test/Target/LLVMIR/omptarget-depend.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-depend.mlir @@ -47,7 +47,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a %11 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.array<40 x i32>) map_clauses(from) capture(ByRef) bounds(%9) -> !llvm.ptr {name = "b"} %12 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} %13 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target depend(taskdependin -> %4 : !llvm.ptr) map_entries(%10 -> %arg0, %11 -> %arg1, %12 -> %arg2, %13 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) depend(taskdependin -> %4 : !llvm.ptr) map_entries(%10 -> %arg0, %11 -> %arg1, %12 -> %arg2, %13 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { %14 = llvm.mlir.constant(0 : index) : i64 %15 = llvm.mlir.constant(10 : i32) : i32 %16 = llvm.mlir.constant(1 : index) : i64 diff --git a/mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir b/mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir index 9f57255d564b3..12c6673c4d084 100644 --- a/mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-device-shared-memory.mlir @@ -25,7 +25,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %7 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"} %8 = omp.map.info var_ptr(%4 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "y"} %9 = omp.map.info var_ptr(%6 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "z"} - omp.target map_entries(%7 -> %arg0, %8 -> %arg1, %9 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%7 -> %arg0, %8 -> %arg1, %9 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { %11 = llvm.mlir.constant(10000 : i32) : i32 %12 = llvm.mlir.constant(1 : i32) : i32 omp.teams reduction(@reduction %arg0 -> %arg3 : !llvm.ptr) { diff --git a/mlir/test/Target/LLVMIR/omptarget-device.mlir b/mlir/test/Target/LLVMIR/omptarget-device.mlir index ce82f55561714..d060e512c4cd9 100644 --- a/mlir/test/Target/LLVMIR/omptarget-device.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-device.mlir @@ -6,39 +6,39 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["nvptx64- // Constant i16 -> i64 in the runtime call. %c1_i16 = llvm.mlir.constant(1 : i16) : i16 - omp.target device(%c1_i16 : i16) + omp.target kernel_type(generic) device(%c1_i16 : i16) host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) { omp.terminator } // Constant i32 -> i64 in the runtime call. %c2_i32 = llvm.mlir.constant(2 : i32) : i32 - omp.target device(%c2_i32 : i32) + omp.target kernel_type(generic) device(%c2_i32 : i32) host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) { omp.terminator } // Constant i64 stays i64 in the runtime call. %c3_i64 = llvm.mlir.constant(3 : i64) : i64 - omp.target device(%c3_i64 : i64) + omp.target kernel_type(generic) device(%c3_i64 : i64) host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) { omp.terminator } // Variable i16 -> cast to i64. - omp.target device(%d16 : i16) + omp.target kernel_type(generic) device(%d16 : i16) host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) { omp.terminator } // Variable i32 -> cast to i64. - omp.target device(%d32 : i32) + omp.target kernel_type(generic) device(%d32 : i32) host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) { omp.terminator } // Variable i64 stays i64. - omp.target device(%d64 : i64) + omp.target kernel_type(generic) device(%d64 : i64) host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) { omp.terminator } diff --git a/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir b/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir index 62b8fe76680cc..a195bb49bff5b 100644 --- a/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-fortran-common-block-host.mlir @@ -14,7 +14,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a %4 = llvm.getelementptr %2[%0] : (!llvm.ptr, i64) -> !llvm.ptr, i8 %5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "var1"} %6 = omp.map.info var_ptr(%4 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "var2"} - omp.target map_entries(%5 -> %arg0, %6 -> %arg1 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%5 -> %arg0, %6 -> %arg1 : !llvm.ptr, !llvm.ptr) { omp.terminator } llvm.return @@ -23,7 +23,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a llvm.func @omp_map_common_block_using_common_block_symbol() { %0 = llvm.mlir.addressof @var_common_ : !llvm.ptr %1 = omp.map.info var_ptr(%0 : !llvm.ptr, !llvm.array<8 x i8>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "var_common"} - omp.target map_entries(%1 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%1 -> %arg0 : !llvm.ptr) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-if-nowait.mlir b/mlir/test/Target/LLVMIR/omptarget-if-nowait.mlir index 6f8d938a4d5f2..a3003282e3365 100644 --- a/mlir/test/Target/LLVMIR/omptarget-if-nowait.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-if-nowait.mlir @@ -10,7 +10,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a %9 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "cond"} %10 = omp.map.info var_ptr(%arg0 : !llvm.ptr, f32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "var"} %11 = omp.map.info var_ptr(%arg1 : !llvm.ptr, f32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "val"} - omp.target if(%8) nowait map_entries(%10 -> %arg3, %11 -> %arg4 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) if(%8) nowait map_entries(%10 -> %arg3, %11 -> %arg4 : !llvm.ptr, !llvm.ptr) { %12 = llvm.load %arg4 : !llvm.ptr -> f32 llvm.store %12, %arg3 : f32, !llvm.ptr omp.terminator diff --git a/mlir/test/Target/LLVMIR/omptarget-if.mlir b/mlir/test/Target/LLVMIR/omptarget-if.mlir index 706ad4411438b..c9e180d2af871 100644 --- a/mlir/test/Target/LLVMIR/omptarget-if.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-if.mlir @@ -2,7 +2,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} { llvm.func @target_if_variable(%x : i1) { - omp.target if(%x) { + omp.target kernel_type(generic) if(%x) { omp.terminator } llvm.return @@ -31,7 +31,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a llvm.func @target_if_true() { %0 = llvm.mlir.constant(true) : i1 - omp.target if(%0) { + omp.target kernel_type(generic) if(%0) { omp.terminator } llvm.return @@ -53,7 +53,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a llvm.func @target_if_false() { %0 = llvm.mlir.constant(false) : i1 - omp.target if(%0) { + omp.target kernel_type(generic) if(%0) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir index 0548c6a178d0c..17d18773c48eb 100644 --- a/mlir/test/Target/LLVMIR/omptarget-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-llvm.mlir @@ -628,7 +628,7 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { llvm.func @_QPomp_target_is_device_ptr(%arg0 : !llvm.ptr) { %map = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.ptr) map_clauses(is_device_ptr) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries(%map -> %ptr_arg : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%map -> %ptr_arg : !llvm.ptr) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-mapper-combined-entry.mlir b/mlir/test/Target/LLVMIR/omptarget-mapper-combined-entry.mlir index 1cf16183b943b..8b84c0e37199c 100644 --- a/mlir/test/Target/LLVMIR/omptarget-mapper-combined-entry.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-mapper-combined-entry.mlir @@ -25,7 +25,7 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { %map_parent = omp.map.info var_ptr(%s : !llvm.ptr, !llvm.struct<"S", (i32, i32)>) map_clauses(tofrom) capture(ByRef) mapper(@mapper) members(%map_field0 : [0] : !llvm.ptr) -> !llvm.ptr {name = "s"} - omp.target map_entries(%map_parent -> %arg0, %map_field0 -> %arg1 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%map_parent -> %arg0, %map_field0 -> %arg1 : !llvm.ptr, !llvm.ptr) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir index ce9a4dcbd55be..f593048ffceae 100644 --- a/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-memcpy-align-metadata.mlir @@ -21,7 +21,7 @@ module attributes {llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:6 %11 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "b"} %12 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "k"} %13 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "nz"} - omp.target map_entries(%10 -> %arg0, %11 -> %arg1, %12 -> %arg2, %13 -> %arg3, %9 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(spmd) map_entries(%10 -> %arg0, %11 -> %arg1, %12 -> %arg2, %13 -> %arg3, %9 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { %14 = llvm.mlir.constant(0 : index) : i64 %15 = llvm.mlir.constant(13 : i32) : i32 %16 = llvm.mlir.constant(1000 : i32) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir index fac61e05f097f..bc1022c27f3ae 100644 --- a/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-multi-block-reduction.mlir @@ -34,7 +34,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 : %10 = llvm.alloca %c1 x !llvm.array<5 x f32> {bindc_name = "x"} : (i64) -> !llvm.ptr<5> %11 = llvm.addrspacecast %10 : !llvm.ptr<5> to !llvm.ptr %74 = omp.map.info var_ptr(%11 : !llvm.ptr, !llvm.array<5 x f32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"} - omp.target map_entries(%74 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(spmd) map_entries(%74 -> %arg0 : !llvm.ptr) { %c1_2 = llvm.mlir.constant(1 : i32) : i32 %c10 = llvm.mlir.constant(10 : i32) : i32 omp.teams reduction(byref @add_reduction_byref_box_5xf32 %arg0 -> %arg2 : !llvm.ptr) { diff --git a/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir index 8950db3fc48aa..bd17f68e00e35 100644 --- a/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-multi-reduction.mlir @@ -63,7 +63,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %32 = omp.map.info var_ptr(%14 : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "ce3"} %33 = omp.map.info var_ptr(%11 : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "ce4"} %34 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "j"} - omp.target map_entries(%30 -> %arg0, %31 -> %arg1, %32 -> %arg2, %33 -> %arg3, %34 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(spmd) map_entries(%30 -> %arg0, %31 -> %arg1, %32 -> %arg2, %33 -> %arg3, %34 -> %arg4 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { %35 = llvm.mlir.constant(1.000000e+00 : f32) : f32 %36 = llvm.mlir.constant(1.000000e+00 : f64) : f64 %37 = llvm.mlir.constant(1000 : i32) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir index 82c3dd829d13e..7e8ba4ec54a3a 100644 --- a/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-nested-ptr-record-type-mapping-host.mlir @@ -20,7 +20,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a %9 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%8 : !llvm.ptr) bounds(%5) -> !llvm.ptr {name = ""} %10 = omp.map.info var_ptr(%7 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "one_l%nest%array_k"} %11 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(f32, struct<(ptr, i64, i32, i8, i8, i8, i8)>, array<10 x i32>, f32, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32, struct<(f32, array<10 x i32>, struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>, i32)>)>) map_clauses(tofrom) capture(ByRef) members(%10, %9 : [6,2], [6,2,0] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {name = "one_l", partial_map = true} - omp.target map_entries(%10 -> %arg1, %9 -> %arg2, %11 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%10 -> %arg1, %9 -> %arg2, %11 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir index a49ed19bc6bc0..ac0d0952b8424 100644 --- a/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-nested-record-type-mapping-host.mlir @@ -22,7 +22,7 @@ llvm.func @_QQmain() { %10 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%0 : i64) stride(%2 : i64) start_idx(%2 : i64) %11 = omp.map.info var_ptr(%9 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%10) -> !llvm.ptr %12 = omp.map.info var_ptr(%4 : !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, struct<(f32, i32)>, i32)>) map_clauses(tofrom) capture(ByRef) members(%6, %8, %11 : [3], [2, 1], [1] : !llvm.ptr, !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true} - omp.target map_entries(%6 -> %arg0, %8 -> %arg1, %11 -> %arg2, %12 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%6 -> %arg0, %8 -> %arg1, %11 -> %arg2, %12 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-nowait-host-only.mlir b/mlir/test/Target/LLVMIR/omptarget-nowait-host-only.mlir index 94d8d052d087e..0369bd7e9004d 100644 --- a/mlir/test/Target/LLVMIR/omptarget-nowait-host-only.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-nowait-host-only.mlir @@ -8,7 +8,7 @@ module attributes {omp.is_target_device = false} { %0 = llvm.mlir.constant(1 : i64) : i64 %1 = llvm.alloca %0 x f32 {bindc_name = "x"} : (i64) -> !llvm.ptr %3 = omp.map.info var_ptr(%1 : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "x"} - omp.target nowait map_entries(%3 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) nowait map_entries(%3 -> %arg0 : !llvm.ptr) { %4 = llvm.mlir.constant(5.000000e+00 : f32) : f32 llvm.store %4, %arg0 : f32, !llvm.ptr omp.terminator diff --git a/mlir/test/Target/LLVMIR/omptarget-nowait-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-nowait-llvm.mlir index 5eee7b7d7d976..74f05fdab2a6c 100644 --- a/mlir/test/Target/LLVMIR/omptarget-nowait-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-nowait-llvm.mlir @@ -6,7 +6,7 @@ module attributes {omp.target_triples = ["dummy-target-triple"]} { %0 = llvm.mlir.constant(1 : i64) : i64 %1 = llvm.alloca %0 x i32 : (i64) -> !llvm.ptr %2 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr - omp.target nowait map_entries(%2 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) nowait map_entries(%2 -> %arg0 : !llvm.ptr) { %3 = llvm.mlir.constant(2 : i32) : i32 llvm.store %3, %arg0 : i32, !llvm.ptr omp.terminator diff --git a/mlir/test/Target/LLVMIR/omptarget-nowait.mlir b/mlir/test/Target/LLVMIR/omptarget-nowait.mlir index a96756f468df3..67d683318412d 100644 --- a/mlir/test/Target/LLVMIR/omptarget-nowait.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-nowait.mlir @@ -8,7 +8,7 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { %3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, f64) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%2 : !llvm.ptr) -> !llvm.ptr {name = ""} %4 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(to) capture(ByRef) members(%3 : [0] : !llvm.ptr) -> !llvm.ptr {name = "a"} %5 = omp.map.info var_ptr(%1 : !llvm.ptr, f64) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target nowait map_entries(%4 -> %arg1, %5 -> %arg2, %3 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) nowait map_entries(%4 -> %arg1, %5 -> %arg2, %3 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { %two_f = llvm.mlir.constant(2.000000e+00 : f64) : f64 %one_i = llvm.mlir.constant(1 : index) : i64 %6 = llvm.getelementptr %arg1[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> diff --git a/mlir/test/Target/LLVMIR/omptarget-nullary-record-ptr-member-map.mlir b/mlir/test/Target/LLVMIR/omptarget-nullary-record-ptr-member-map.mlir index f4423294eb678..3d5d4b5c84c88 100644 --- a/mlir/test/Target/LLVMIR/omptarget-nullary-record-ptr-member-map.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-nullary-record-ptr-member-map.mlir @@ -9,7 +9,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a llvm.func @test_select_gen(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { %0 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%arg1 : !llvm.ptr) -> !llvm.ptr %1 = omp.map.info var_ptr(%arg0 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) members(%0 : [0] : !llvm.ptr) -> !llvm.ptr - omp.target map_entries(%0 -> %arg2, %1 -> %arg3 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%0 -> %arg2, %1 -> %arg3 : !llvm.ptr, !llvm.ptr) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-overlapping-record-member-map.mlir b/mlir/test/Target/LLVMIR/omptarget-overlapping-record-member-map.mlir index 7e8f760cf3ee1..16a0ed479664a 100644 --- a/mlir/test/Target/LLVMIR/omptarget-overlapping-record-member-map.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-overlapping-record-member-map.mlir @@ -7,7 +7,7 @@ module attributes {llvm.target_triple = "x86_64-unknown-linux-gnu", omp.is_gpu = %2 = llvm.getelementptr %1[0, 1] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<"_QFTdtype", (f32, i32)> %3 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "dtypev%value2"} %4 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.struct<"_QFTdtype", (f32, i32)>) map_clauses(to) capture(ByRef) members(%3 : [1] : !llvm.ptr) -> !llvm.ptr {name = "dtypev"} - omp.target map_entries(%4 -> %arg0, %3 -> %arg1 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%4 -> %arg0, %3 -> %arg1 : !llvm.ptr, !llvm.ptr) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir index b18338ea35cc3..86fe7b87e6404 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm-debug.mlir @@ -16,7 +16,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, %1 = llvm.alloca %0 x i32 {bindc_name = "x"} : (i64) -> !llvm.ptr llvm.intr.dbg.declare #var_x = %1 : !llvm.ptr loc(#loc2) %5 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "x"} - omp.target map_entries(%5 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%5 -> %arg0 : !llvm.ptr) { %6 = llvm.mlir.constant(1 : i32) : i32 llvm.intr.dbg.declare #var_x1 = %arg0 : !llvm.ptr loc(#loc3) omp.parallel { diff --git a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir index c1016775270a6..1262657d3a0c4 100644 --- a/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-parallel-llvm.mlir @@ -6,7 +6,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_gpu = true, omp.is_target_device = true} { llvm.func @_QQmain_omp_outline_1(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget} { %0 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"} - omp.target map_entries(%0 -> %arg2 : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%0 -> %arg2 : !llvm.ptr) { omp.parallel { %1 = llvm.mlir.constant(1 : i32) : i32 llvm.store %1, %arg2 : i32, !llvm.ptr @@ -19,7 +19,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo llvm.func @_test_num_threads(%arg0: !llvm.ptr) attributes {omp.declare_target = #omp.declaretarget} { %0 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"} - omp.target map_entries(%0 -> %arg2 : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%0 -> %arg2 : !llvm.ptr) { %1 = llvm.mlir.constant(156 : i32) : i32 omp.parallel num_threads(%1 : i32) { %2 = llvm.mlir.constant(1 : i32) : i32 @@ -37,7 +37,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %cast = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr %2 = omp.map.info var_ptr(%cast : !llvm.ptr, i32) map_clauses(from) capture(ByRef) -> !llvm.ptr {name = "d"} %3 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "ifcond"} - omp.target map_entries(%2 -> %arg1, %3 -> %arg2 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%2 -> %arg1, %3 -> %arg2 : !llvm.ptr, !llvm.ptr) { %4 = llvm.mlir.constant(10 : i32) : i32 %5 = llvm.load %arg2 : !llvm.ptr -> i32 %6 = llvm.mlir.constant(0 : i64) : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir index b978354e25329..3c2fc422c2987 100644 --- a/mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-private-llvm.mlir @@ -15,7 +15,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %3 = llvm.mlir.constant(1 : i64) : i64 %4 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} %5 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "d"} - omp.target map_entries(%4 -> %arg1, %5 -> %arg2 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(spmd) map_entries(%4 -> %arg1, %5 -> %arg2 : !llvm.ptr, !llvm.ptr) { %6 = llvm.mlir.constant(1 : i32) : i32 omp.teams { diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir index 769966c000eaf..0d1c34d561c0d 100644 --- a/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-record-type-mapping-host.mlir @@ -21,7 +21,7 @@ llvm.func @_QQmain() { %10 = omp.map.bounds lower_bound(%2 : i64) upper_bound(%1 : i64) extent(%0 : i64) stride(%2 : i64) start_idx(%2 : i64) %11 = omp.map.info var_ptr(%9 : !llvm.ptr, !llvm.array<10 x i32>) map_clauses(tofrom) capture(ByRef) bounds(%10) -> !llvm.ptr %12 = omp.map.info var_ptr(%4 : !llvm.ptr, !llvm.struct<(f32, array<10 x i32>, i32)>) map_clauses(tofrom) capture(ByRef) members(%7, %11 : [2], [1] : !llvm.ptr, !llvm.ptr) -> !llvm.ptr {partial_map = true} - omp.target map_entries(%7 -> %arg0, %11 -> %arg1, %12 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%7 -> %arg0, %11 -> %arg1, %12 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir index a25226eb76c5e..3aa381e3fdbf3 100644 --- a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir @@ -42,7 +42,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a %31 = llvm.getelementptr %5[0, 0] : (!llvm.ptr) -> !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)> %32 = omp.map.info var_ptr(%5 : !llvm.ptr, f32) map_clauses(tofrom) capture(ByRef) var_ptr_ptr(%31 : !llvm.ptr) -> !llvm.ptr {name = "scalar"} %33 = omp.map.info var_ptr(%5 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(tofrom) capture(ByRef) members(%32 : [0] : !llvm.ptr) -> !llvm.ptr {name = "scalar"} - omp.target map_entries(%17 -> %arg0, %18 -> %arg1, %29 -> %arg2, %30 -> %arg3, %32 -> %arg4, %33 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%17 -> %arg0, %18 -> %arg1, %29 -> %arg2, %30 -> %arg3, %32 -> %arg4, %33 -> %arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr, !llvm.ptr) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir index 3ebb79fef7474..cf536a7368070 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-device-llvm.mlir @@ -18,7 +18,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo %map1 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map2 = omp.map.info var_ptr(%9 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map3 = omp.map.info var_ptr(%10 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { %11 = llvm.load %arg0 : !llvm.ptr -> i32 %12 = llvm.load %arg1 : !llvm.ptr -> i32 %13 = llvm.add %11, %12 : i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir b/mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir index 333c8c308db96..a657254f91421 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-host-only.mlir @@ -15,7 +15,7 @@ module attributes {omp.is_target_device = false} { %map1 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map2 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map3 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { %8 = llvm.load %arg0 : !llvm.ptr -> i32 %9 = llvm.load %arg1 : !llvm.ptr -> i32 %10 = llvm.add %8, %9 : i32 @@ -26,7 +26,7 @@ module attributes {omp.is_target_device = false} { } llvm.func @omp_target_no_map() { - omp.target { + omp.target kernel_type(generic) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir index 8b769f2e7d1a4..400a60a8de9ac 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-llvm.mlir @@ -15,7 +15,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a %map1 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map2 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map3 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { %8 = llvm.load %arg0 : !llvm.ptr -> i32 %9 = llvm.load %arg1 : !llvm.ptr -> i32 %10 = llvm.add %8, %9 : i32 @@ -26,7 +26,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a } llvm.func @omp_target_no_map() { - omp.target { + omp.target kernel_type(generic) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir index c11db4be1aa7c..697759b6903e3 100644 --- a/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-region-parallel-llvm.mlir @@ -15,7 +15,7 @@ module attributes {omp.is_target_device = false} { %map1 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map2 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} %map3 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries( %map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries( %map1 -> %arg0, %map2 -> %arg1, %map3 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { omp.parallel { %8 = llvm.load %arg0 : !llvm.ptr -> i32 %9 = llvm.load %arg1 : !llvm.ptr -> i32 diff --git a/mlir/test/Target/LLVMIR/omptarget-runtimecc.mlir b/mlir/test/Target/LLVMIR/omptarget-runtimecc.mlir index a232bd7f91d1c..be68779a17141 100644 --- a/mlir/test/Target/LLVMIR/omptarget-runtimecc.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-runtimecc.mlir @@ -4,7 +4,7 @@ module attributes {omp.is_target_device = true, omp.is_gpu = true, omp.target_t // CHECK: call spir_func i32 @__kmpc_target_init // CHECK: call spir_func void @__kmpc_target_deinit llvm.func @target_if_variable(%x : i1) { - omp.target if(%x) { + omp.target kernel_type(generic) if(%x) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-target-cpu-features.mlir b/mlir/test/Target/LLVMIR/omptarget-target-cpu-features.mlir index fddb799142820..087b0fc3cdd29 100644 --- a/mlir/test/Target/LLVMIR/omptarget-target-cpu-features.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-target-cpu-features.mlir @@ -8,7 +8,7 @@ module attributes {omp.is_target_device = false} { target_cpu = "x86-64", target_features = #llvm.target_features<["+mmx", "+sse"]> } { - omp.target { + omp.target kernel_type(generic) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction-array-descriptor.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction-array-descriptor.mlir index 84b4a0e71c36f..b59bf8f601405 100644 --- a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction-array-descriptor.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction-array-descriptor.mlir @@ -26,7 +26,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<"dlti.alloca_memory_space" = 5 : %1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i64) -> !llvm.ptr<5> %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr %3 = omp.map.info var_ptr(%2 : !llvm.ptr, !llvm.array<4 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "red_array"} - omp.target map_entries(%3 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(spmd) map_entries(%3 -> %arg0 : !llvm.ptr) { %4 = llvm.mlir.constant(1 : i32) : i32 %5 = llvm.mlir.constant(1000 : i32) : i32 omp.teams reduction(byref @add_reduction_byref_box_4xi32 %arg0 -> %arg1 : !llvm.ptr) { @@ -89,7 +89,7 @@ module attributes {llvm.target_triple = "nvptx64-nvidia-cuda", omp.is_gpu = true %1 = llvm.alloca %0 x !llvm.array<4 x i32> : (i64) -> !llvm.ptr<5> %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr %3 = omp.map.info var_ptr(%2 : !llvm.ptr, !llvm.array<4 x i32>) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "red_array"} - omp.target map_entries(%3 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(spmd) map_entries(%3 -> %arg0 : !llvm.ptr) { %4 = llvm.mlir.constant(1 : i32) : i32 %5 = llvm.mlir.constant(1000 : i32) : i32 omp.teams reduction(byref @add_reduction_byref_box_4xi32 %arg0 -> %arg1 : !llvm.ptr) { diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir index b7cb1026967f3..baf5330461c57 100644 --- a/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-teams-distribute-reduction.mlir @@ -27,7 +27,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo llvm.store %6, %2 : i32, !llvm.ptr %9 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sum"} %10 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "index_"} - omp.target map_entries(%9 -> %arg0, %10 -> %arg1 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%9 -> %arg0, %10 -> %arg1 : !llvm.ptr, !llvm.ptr) { %11 = llvm.mlir.constant(10000 : i32) : i32 %12 = llvm.mlir.constant(1 : i32) : i32 omp.teams reduction(@add_reduction_i32 %arg0 -> %arg2 : !llvm.ptr) { diff --git a/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir b/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir index 36eb280dfcfa2..7c3551d1e96b0 100644 --- a/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-teams-reduction.mlir @@ -26,7 +26,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo llvm.store %6, %2 : i32, !llvm.ptr %9 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = "sum"} %10 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "index_"} - omp.target map_entries(%9 -> %arg0, %10 -> %arg1 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%9 -> %arg0, %10 -> %arg1 : !llvm.ptr, !llvm.ptr) { %11 = llvm.mlir.constant(0 : index) : i64 %12 = llvm.mlir.constant(10000 : index) : i64 %13 = llvm.mlir.constant(1 : index) : i64 diff --git a/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir b/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir index 53c9b4f559645..28d9e99a2eccd 100644 --- a/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir +++ b/mlir/test/Target/LLVMIR/openmp-data-target-device.mlir @@ -22,17 +22,17 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { omp.target_data map_entries(%12 : !llvm.ptr) { %13 = omp.map.info var_ptr(%10 : !llvm.ptr, !llvm.array<100 x i32>) map_clauses(from) capture(ByRef) bounds(%11) -> !llvm.ptr {name = "int_array"} %14 = omp.map.info var_ptr(%9 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "index_"} - omp.target map_entries(%13 -> %arg0, %14 -> %arg1 : !llvm.ptr, !llvm.ptr) { - %15 = llvm.mlir.constant(100 : i32) : i32 - %16 = llvm.mlir.constant(1 : i32) : i32 + %15 = llvm.mlir.constant(100 : i32) : i32 + %16 = llvm.mlir.constant(1 : i32) : i32 + omp.target kernel_type(spmd) host_eval(%15 -> %arg0, %16 -> %arg1 : i32, i32) map_entries(%13 -> %arg2, %14 -> %arg3 : !llvm.ptr, !llvm.ptr) { %17 = llvm.mlir.constant(100 : index) : i64 omp.parallel { %18 = llvm.mlir.constant(1 : i64) : i64 %19 = llvm.alloca %18 x i32 {pinned} : (i64) -> !llvm.ptr<5> %20 = llvm.addrspacecast %19 : !llvm.ptr<5> to !llvm.ptr omp.wsloop { - omp.loop_nest (%arg2) : i32 = (%16) to (%15) inclusive step (%16) { - llvm.store %arg2, %20 : i32, !llvm.ptr + omp.loop_nest (%arg4) : i32 = (%arg1) to (%arg0) inclusive step (%arg1) { + llvm.store %arg4, %20 : i32, !llvm.ptr %21 = llvm.load %20 : !llvm.ptr -> i32 %22 = llvm.sext %21 : i32 to i64 %23 = llvm.mlir.constant(1 : i64) : i64 @@ -42,7 +42,7 @@ module attributes {omp.target_triples = ["amdgcn-amd-amdhsa"]} { %27 = llvm.mul %26, %23 overflow : i64 %28 = llvm.add %27, %24 overflow : i64 %29 = llvm.mul %23, %17 overflow : i64 - %30 = llvm.getelementptr %arg0[%28] : (!llvm.ptr, i64) -> !llvm.ptr, i32 + %30 = llvm.getelementptr %arg2[%28] : (!llvm.ptr, i64) -> !llvm.ptr, i32 llvm.store %21, %30 : i32, !llvm.ptr omp.yield } diff --git a/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir index 73f753dc3f92e..b64236aa37d09 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm-invalid.mlir @@ -132,7 +132,7 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic // expected-error @below {{unsupported host op found in device}} // expected-error @below {{LLVM Translation failed for operation: omp.parallel}} omp.parallel { - omp.target { + omp.target kernel_type(generic) { omp.terminator } omp.terminator @@ -145,7 +145,7 @@ module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_devic module attributes {llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true} { llvm.func @host_op_in_device_sibling_target(%x: !llvm.ptr, %expr: i32) { - omp.target { + omp.target kernel_type(generic) { omp.terminator } // expected-error @below {{unsupported host op found in device}} diff --git a/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir b/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir index 1589778e0627f..93b70258b0cee 100644 --- a/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir +++ b/mlir/test/Target/LLVMIR/openmp-nested-task-target-parallel.mlir @@ -30,17 +30,15 @@ llvm.br ^bb1(%11, %4 : i32, i64) llvm.store %12, %3 : i32, !llvm.ptr omp.task private(@_QFEc_firstprivate_i32 %3 -> %arg0 : !llvm.ptr) { %19 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "i"} - %20 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "c"} - %21 = omp.map.info var_ptr(%9 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "chunksz"} - omp.target map_entries(%19 -> %arg1, %20 -> %arg2, %21 -> %arg3 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { - %22 = llvm.mlir.constant(9999 : i32) : i32 - %23 = llvm.mlir.constant(1 : i32) : i32 + %22 = llvm.mlir.constant(9999 : i32) : i32 + %23 = llvm.mlir.constant(1 : i32) : i32 + %24 = llvm.load %arg0 : !llvm.ptr -> i32 + %25 = llvm.add %24, %22 : i32 + omp.target kernel_type(spmd) host_eval(%23 -> %arg1, %24 -> %arg2, %25 -> %arg3 : i32, i32, i32) map_entries(%19 -> %arg4 : !llvm.ptr) { omp.parallel { - %24 = llvm.load %arg2 : !llvm.ptr -> i32 - %25 = llvm.add %24, %22 : i32 - omp.wsloop private(@_QFEi_private_i32 %arg1 -> %arg4 : !llvm.ptr) { - omp.loop_nest (%arg5) : i32 = (%24) to (%25) inclusive step (%23) { - llvm.store %arg5, %arg4 : i32, !llvm.ptr + omp.wsloop private(@_QFEi_private_i32 %arg4 -> %arg5 : !llvm.ptr) { + omp.loop_nest (%arg6) : i32 = (%arg2) to (%arg3) inclusive step (%arg1) { + llvm.store %arg6, %arg5 : i32, !llvm.ptr omp.yield } } diff --git a/mlir/test/Target/LLVMIR/openmp-private-allloca-hoisting.mlir b/mlir/test/Target/LLVMIR/openmp-private-allloca-hoisting.mlir index 71c4b9cdede59..88b166f362cf3 100644 --- a/mlir/test/Target/LLVMIR/openmp-private-allloca-hoisting.mlir +++ b/mlir/test/Target/LLVMIR/openmp-private-allloca-hoisting.mlir @@ -66,7 +66,7 @@ llvm.func @parallel_op_private_multi_block(%arg0: !llvm.ptr, %arg1: !llvm.ptr) { %arg1_map = omp.map.info var_ptr(%arg1 : !llvm.ptr, !llvm.ptr) map_clauses(is_device_ptr) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries(%arg0_map -> %arg0_arg, %arg1_map -> %arg1_arg : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%arg0_map -> %arg0_arg, %arg1_map -> %arg1_arg : !llvm.ptr, !llvm.ptr) { omp.parallel private(@multi_block.privatizer %arg0_arg -> %arg2, @multi_block.privatizer2 %arg1_arg -> %arg3 : !llvm.ptr, !llvm.ptr) { %0 = llvm.load %arg2 : !llvm.ptr -> f32 diff --git a/mlir/test/Target/LLVMIR/openmp-target-default-as.mlir b/mlir/test/Target/LLVMIR/openmp-target-default-as.mlir index 8344867d5fb7b..d410785c8d6d2 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-default-as.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-default-as.mlir @@ -13,7 +13,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.program_mem %4 = llvm.addrspacecast %3 : !llvm.ptr<5> to !llvm.ptr llvm.store %1, %4 : i32, !llvm.ptr %map = omp.map.info var_ptr(%4 : !llvm.ptr, i32) map_clauses(tofrom) capture(ByRef) -> !llvm.ptr {name = ""} - omp.target map_entries(%map -> %arg : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%map -> %arg : !llvm.ptr) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir b/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir index ecfc2dc7b4598..a91abc6ff8719 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-generic-spmd.mlir @@ -8,7 +8,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a llvm.func @main(%arg0 : !llvm.ptr) { %x = llvm.load %arg0 : !llvm.ptr -> i32 %0 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr - omp.target host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) map_entries(%0 -> %ptr : !llvm.ptr) { + omp.target kernel_type(spmd) host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) map_entries(%0 -> %ptr : !llvm.ptr) { %x.map = llvm.load %ptr : !llvm.ptr -> i32 omp.teams { omp.distribute { @@ -60,7 +60,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} { llvm.func @main(%arg0 : !llvm.ptr) { %0 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr - omp.target map_entries(%0 -> %ptr : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%0 -> %ptr : !llvm.ptr) { %x = llvm.load %ptr : !llvm.ptr -> i32 omp.teams { omp.distribute { diff --git a/mlir/test/Target/LLVMIR/openmp-target-has-device-addr.mlir b/mlir/test/Target/LLVMIR/openmp-target-has-device-addr.mlir index be592242ef6c5..a051bce3e913c 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-has-device-addr.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-has-device-addr.mlir @@ -12,7 +12,7 @@ module attributes { llvm.target_triple = "x86_64-unknown-linux-gnu", omp.target_ %0 = llvm.mlir.constant(1 : i32) : i32 %1 = llvm.alloca %0 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr %41 = omp.map.info var_ptr(%1 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)>) map_clauses(always, implicit, to) capture(ByRef) -> !llvm.ptr {name = "x"} - omp.target has_device_addr(%41 -> %arg1 : !llvm.ptr) { + omp.target kernel_type(generic) has_device_addr(%41 -> %arg1 : !llvm.ptr) { omp.terminator } llvm.return diff --git a/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir b/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir index c7f1490240182..c5d06b8869371 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-launch-device.mlir @@ -18,7 +18,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo // CHECK: %{{.*}} = call i32 @__kmpc_target_init(ptr @[[KERNEL1_ENV]], ptr %[[KERNEL_ARGS]]) %target_threads = llvm.mlir.constant(20) : i32 %0 = omp.map.info var_ptr(%num_teams : !llvm.ptr, i32) map_clauses(to) capture(ByCopy) -> !llvm.ptr - omp.target thread_limit(%target_threads : i32) map_entries(%0 -> %arg_teams : !llvm.ptr) { + omp.target kernel_type(generic) thread_limit(%target_threads : i32) map_entries(%0 -> %arg_teams : !llvm.ptr) { %teams_threads = llvm.mlir.constant(10) : i32 %num_teams1 = llvm.load %arg_teams : !llvm.ptr -> i32 omp.teams num_teams(to %num_teams1 : i32) thread_limit(%teams_threads : i32) { @@ -30,7 +30,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memo // CHECK: define weak_odr protected amdgpu_kernel void @__omp_offloading_{{.*}}_main_l{{[0-9]+}}(ptr %[[KERNEL_ARGS:.*]]) #[[ATTRS2:[0-9]+]] // CHECK: %{{.*}} = call i32 @__kmpc_target_init(ptr @[[KERNEL2_ENV]], ptr %[[KERNEL_ARGS]]) %target_threads2 = llvm.mlir.constant(30) : i32 - omp.target thread_limit(%target_threads2 : i32) { + omp.target kernel_type(generic) thread_limit(%target_threads2 : i32) { %num_teams2 = llvm.mlir.constant(40) : i32 omp.teams num_teams(to %num_teams2 : i32) { omp.terminator diff --git a/mlir/test/Target/LLVMIR/openmp-target-launch-host.mlir b/mlir/test/Target/LLVMIR/openmp-target-launch-host.mlir index abc67017b620d..b08084a280914 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-launch-host.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-launch-host.mlir @@ -19,7 +19,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a llvm.func @main(%num_teams : i32) { %target_threads = llvm.mlir.constant(20) : i32 %teams_threads = llvm.mlir.constant(10) : i32 - omp.target thread_limit(%target_threads : i32) + omp.target kernel_type(generic) thread_limit(%target_threads : i32) host_eval(%num_teams -> %arg_teams, %teams_threads -> %arg_teams_threads : i32, i32) { omp.teams num_teams(to %arg_teams : i32) thread_limit(%arg_teams_threads : i32) { omp.terminator diff --git a/mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir index a47955cc28e15..aaaf7926a70d1 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-multiple-private.mlir @@ -29,7 +29,7 @@ llvm.func @target_allocatable_(%arg0: !llvm.ptr {fir.bindc_name = "lb"}, %arg1: %53 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"} %54 = omp.map.info var_ptr(%13 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr %55 = omp.map.info var_ptr(%14 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr - omp.target map_entries(%53 -> %arg3, %54 -> %arg4, %55 ->%arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@box.heap_privatizer0 %13 -> %arg6 [map_idx=1], @box.heap_privatizer1 %14 -> %arg7 [map_idx=2]: !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%53 -> %arg3, %54 -> %arg4, %55 ->%arg5 : !llvm.ptr, !llvm.ptr, !llvm.ptr) private(@box.heap_privatizer0 %13 -> %arg6 [map_idx=1], @box.heap_privatizer1 %14 -> %arg7 [map_idx=2]: !llvm.ptr, !llvm.ptr) { llvm.call @use_private_var0(%arg6) : (!llvm.ptr) -> () llvm.call @use_private_var1(%arg7) : (!llvm.ptr) -> () omp.terminator diff --git a/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir index 2aa11f3a1aa34..a57c5cc1c328d 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-private-allocatable.mlir @@ -35,7 +35,7 @@ llvm.func @target_allocatable_(%arg0: !llvm.ptr {fir.bindc_name = "lb"}, %arg1: %52 = llvm.alloca %39 x f32 {bindc_name = "real_arr"} : (i64) -> !llvm.ptr %53 = omp.map.info var_ptr(%7 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"} %54 = omp.map.info var_ptr(%13 : !llvm.ptr, !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8)>) map_clauses(to) capture(ByRef) -> !llvm.ptr - omp.target map_entries(%53 -> %arg3, %54 -> %arg4 : !llvm.ptr, !llvm.ptr) private(@box.heap_privatizer %13 -> %arg5 [map_idx=1] : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%53 -> %arg3, %54 -> %arg4 : !llvm.ptr, !llvm.ptr) private(@box.heap_privatizer %13 -> %arg5 [map_idx=1] : !llvm.ptr) { llvm.call @use_private_var(%arg5) : (!llvm.ptr) -> () omp.terminator } diff --git a/mlir/test/Target/LLVMIR/openmp-target-private-shared-mem.mlir b/mlir/test/Target/LLVMIR/openmp-target-private-shared-mem.mlir index 1481d8133cb0c..4df2a504779bc 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-private-shared-mem.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-private-shared-mem.mlir @@ -27,7 +27,7 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd // CHECK: call void @device_func(ptr %[[ALLOC0]]) // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC0]], i64 4) // CHECK: call void @__kmpc_target_deinit - omp.target private(@simple_var.privatizer %2 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) private(@simple_var.privatizer %2 -> %arg0 : !llvm.ptr) { llvm.call @device_func(%arg0) : (!llvm.ptr) -> () omp.terminator } @@ -44,7 +44,7 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC_ARGS0]], i64 8) // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC1]], i64 4) // CHECK: call void @__kmpc_target_deinit - omp.target private(@simple_var.privatizer %2 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) private(@simple_var.privatizer %2 -> %arg0 : !llvm.ptr) { omp.parallel reduction(@simple_var.reducer %arg0 -> %arg1 : !llvm.ptr) { %3 = llvm.load %arg1 : !llvm.ptr -> i32 omp.terminator @@ -64,7 +64,7 @@ module attributes {omp.is_target_device = true, llvm.target_triple = "amdgcn-amd // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC_ARGS1]], i64 8) // CHECK: call void @__kmpc_free_shared(ptr %[[ALLOC2]], i64 4) // CHECK: call void @__kmpc_target_deinit - omp.target private(@simple_var.privatizer %2 -> %arg0 : !llvm.ptr) { + omp.target kernel_type(generic) private(@simple_var.privatizer %2 -> %arg0 : !llvm.ptr) { omp.parallel { %4 = llvm.load %arg0 : !llvm.ptr -> i32 omp.terminator diff --git a/mlir/test/Target/LLVMIR/openmp-target-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-private.mlir index 41927f6e8c26e..9d84b8ba462c0 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-private.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-private.mlir @@ -9,7 +9,7 @@ llvm.func @target_map_single_private() attributes {fir.internal_name = "_QPtarge %4 = llvm.mlir.constant(2 : i32) : i32 llvm.store %4, %3 : i32, !llvm.ptr %5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"} - omp.target map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%5 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1 : !llvm.ptr) { %6 = llvm.mlir.constant(10 : i32) : i32 %7 = llvm.load %arg0 : !llvm.ptr -> i32 %8 = llvm.add %7, %6 : i32 @@ -29,7 +29,7 @@ llvm.func @target_map_2_privates() attributes {fir.internal_name = "_QPtarget_ma %6 = llvm.mlir.constant(2 : i32) : i32 llvm.store %6, %5 : i32, !llvm.ptr %7 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr {name = "a"} - omp.target map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1, @n.privatizer %3 -> %arg2 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%7 -> %arg0 : !llvm.ptr) private(@simple_var.privatizer %1 -> %arg1, @n.privatizer %3 -> %arg2 : !llvm.ptr, !llvm.ptr) { %8 = llvm.mlir.constant(1.100000e+01 : f32) : f32 %9 = llvm.mlir.constant(10 : i32) : i32 %10 = llvm.load %arg0 : !llvm.ptr -> i32 @@ -58,7 +58,7 @@ omp.private {type = private} @multi_block.privatizer : f32 init { } llvm.func @target_op_private_multi_block(%arg0: !llvm.ptr) { - omp.target private(@multi_block.privatizer %arg0 -> %arg2 : !llvm.ptr) { + omp.target kernel_type(generic) private(@multi_block.privatizer %arg0 -> %arg2 : !llvm.ptr) { %0 = llvm.load %arg2 : !llvm.ptr -> f32 omp.terminator } @@ -101,7 +101,7 @@ llvm.func @target_boxchar_(%arg0: !llvm.ptr {fir.bindc_name = "l"}) attributes { %13 = omp.map.info var_ptr(%1 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "mapped_var"} llvm.store %12, %3 : !llvm.struct<(ptr, i64)>, !llvm.ptr %14 = omp.map.info var_ptr(%3 : !llvm.ptr, !llvm.struct<(ptr, i64)>) map_clauses(to) capture(ByRef) -> !llvm.ptr - omp.target map_entries(%13 -> %arg1, %14 -> %arg2 : !llvm.ptr, !llvm.ptr) private(@_QFtarget_boxcharEchar_var_private_boxchar_c8xU %12 -> %arg3 [map_idx=1] : !llvm.struct<(ptr, i64)>) { + omp.target kernel_type(generic) map_entries(%13 -> %arg1, %14 -> %arg2 : !llvm.ptr, !llvm.ptr) private(@_QFtarget_boxcharEchar_var_private_boxchar_c8xU %12 -> %arg3 [map_idx=1] : !llvm.struct<(ptr, i64)>) { %15 = llvm.mlir.constant(0 : index) : i64 %16 = llvm.mlir.constant(32 : i8) : i8 %17 = llvm.mlir.constant(1 : index) : i64 @@ -164,7 +164,7 @@ llvm.func @target_firstprivate_() attributes {fir.internal_name = "_QPtarget_fir %sf = llvm.alloca %0 x f32 {bindc_name = "sf"} : (i64) -> !llvm.ptr %6 = omp.map.info var_ptr(%sv : !llvm.ptr, i32) map_clauses(to) capture(ByRef) -> !llvm.ptr %7 = omp.map.info var_ptr(%sf : !llvm.ptr, f32) map_clauses(to) capture(ByRef) -> !llvm.ptr - omp.target map_entries(%6 -> %arg0, %7 -> %arg1 : !llvm.ptr, !llvm.ptr) private(@sv.firstprivate %sv -> %arg2 [map_idx=0], @sf.firstprivate %sf -> %arg3 [map_idx=1] : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) map_entries(%6 -> %arg0, %7 -> %arg1 : !llvm.ptr, !llvm.ptr) private(@sv.firstprivate %sv -> %arg2 [map_idx=0], @sf.firstprivate %sf -> %arg3 [map_idx=1] : !llvm.ptr, !llvm.ptr) { %8 = llvm.mlir.constant(2.000000e+00 : f64) : f64 %9 = llvm.mlir.constant(10 : i32) : i32 %10 = llvm.load %arg2 : !llvm.ptr -> i32 diff --git a/mlir/test/Target/LLVMIR/openmp-target-simd-on_device.mlir b/mlir/test/Target/LLVMIR/openmp-target-simd-on_device.mlir index 5c971206731e4..387f2bd21a349 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-simd-on_device.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-simd-on_device.mlir @@ -7,7 +7,7 @@ module attributes {omp.is_target_device = true} { } llvm.func @test_target_simd() { - omp.target { + omp.target kernel_type(generic) { %5 = llvm.mlir.constant(1 : i32) : i32 %x = llvm.alloca %5 x i32 {bindc_name = "x"} : (i32) -> !llvm.ptr omp.simd private(@simd_privatizer %x -> %arg1 : !llvm.ptr) { diff --git a/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir b/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir index dae80baad502b..4befc687f05cc 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-spmd.mlir @@ -6,7 +6,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-amd-amdhsa"]} { llvm.func @main(%x : i32) { - omp.target host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) { + omp.target kernel_type(spmd) host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) { omp.teams { omp.parallel { omp.distribute { @@ -52,7 +52,7 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a module attributes {dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui32>>, llvm.target_triple = "amdgcn-amd-amdhsa", omp.is_target_device = true, omp.is_gpu = true} { llvm.func @main(%x : i32) { - omp.target host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) { + omp.target kernel_type(spmd) host_eval(%x -> %lb, %x -> %ub, %x -> %step : i32, i32, i32) { omp.teams { omp.parallel { omp.distribute { diff --git a/mlir/test/Target/LLVMIR/openmp-target-wsloop-private.mlir b/mlir/test/Target/LLVMIR/openmp-target-wsloop-private.mlir index 4b5bb2c3c99af..cffe71e684ebc 100644 --- a/mlir/test/Target/LLVMIR/openmp-target-wsloop-private.mlir +++ b/mlir/test/Target/LLVMIR/openmp-target-wsloop-private.mlir @@ -13,7 +13,7 @@ omp.private {type = private} @impure_alloca_privatizer : !llvm.ptr init { } llvm.func @test_alloca_ip_workaround() { - omp.target { + omp.target kernel_type(generic) { %65 = llvm.mlir.constant(1 : i32) : i32 %66 = llvm.alloca %65 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> {alignment = 8 : i64} : (i32) -> !llvm.ptr %67 = llvm.mlir.constant(0 : index) : i64 diff --git a/mlir/test/Target/LLVMIR/openmp-teams-clauses-trunc-ext.mlir b/mlir/test/Target/LLVMIR/openmp-teams-clauses-trunc-ext.mlir index 1a6d7e04ae8a0..6e978416db189 100644 --- a/mlir/test/Target/LLVMIR/openmp-teams-clauses-trunc-ext.mlir +++ b/mlir/test/Target/LLVMIR/openmp-teams-clauses-trunc-ext.mlir @@ -22,7 +22,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %4 = llvm.load %arg0 : !llvm.ptr -> i32 %5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "i"} %6 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target host_eval(%0 -> %arg1, %4 -> %arg2, %0 -> %arg3, %1 -> %arg4 : i32, i32, i32, i64) map_entries(%5 -> %arg5, %6 -> %arg6 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) host_eval(%0 -> %arg1, %4 -> %arg2, %0 -> %arg3, %1 -> %arg4 : i32, i32, i32, i64) map_entries(%5 -> %arg5, %6 -> %arg6 : !llvm.ptr, !llvm.ptr) { omp.teams num_teams( to %arg4 : i64) { omp.distribute private(@_QFnum_teams_const_8Ei_private_i32 %arg5 -> %arg7 : !llvm.ptr) { omp.loop_nest (%arg8) : i32 = (%arg1) to (%arg2) inclusive step (%arg3) { @@ -48,7 +48,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %5 = omp.map.info var_ptr(%arg1 : !llvm.ptr, i64) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "t"} %6 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "i"} %7 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target host_eval(%0 -> %arg2, %3 -> %arg3, %0 -> %arg4, %4 -> %arg5 : i32, i32, i32, i64) map_entries(%5 -> %arg6, %6 -> %arg7, %7 -> %arg8 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) host_eval(%0 -> %arg2, %3 -> %arg3, %0 -> %arg4, %4 -> %arg5 : i32, i32, i32, i64) map_entries(%5 -> %arg6, %6 -> %arg7, %7 -> %arg8 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { omp.teams num_teams( to %arg5 : i64) { omp.distribute private(@_QFnum_teams_arg_8Ei_private_i32 %arg7 -> %arg9 : !llvm.ptr) { omp.loop_nest (%arg10) : i32 = (%arg2) to (%arg3) inclusive step (%arg4) { @@ -75,7 +75,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %4 = llvm.load %arg0 : !llvm.ptr -> i32 %5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "i"} %6 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target host_eval(%0 -> %arg1, %4 -> %arg2, %0 -> %arg3, %1 -> %arg4 : i32, i32, i32, i16) map_entries(%5 -> %arg5, %6 -> %arg6 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) host_eval(%0 -> %arg1, %4 -> %arg2, %0 -> %arg3, %1 -> %arg4 : i32, i32, i32, i16) map_entries(%5 -> %arg5, %6 -> %arg6 : !llvm.ptr, !llvm.ptr) { omp.teams num_teams( to %arg4 : i16) { omp.distribute private(@_QFnum_teams_const_2Ei_private_i32 %arg5 -> %arg7 : !llvm.ptr) { omp.loop_nest (%arg8) : i32 = (%arg1) to (%arg2) inclusive step (%arg3) { @@ -101,7 +101,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %5 = omp.map.info var_ptr(%arg1 : !llvm.ptr, i16) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "t"} %6 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "i"} %7 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target host_eval(%0 -> %arg2, %3 -> %arg3, %0 -> %arg4, %4 -> %arg5 : i32, i32, i32, i16) map_entries(%5 -> %arg6, %6 -> %arg7, %7 -> %arg8 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) host_eval(%0 -> %arg2, %3 -> %arg3, %0 -> %arg4, %4 -> %arg5 : i32, i32, i32, i16) map_entries(%5 -> %arg6, %6 -> %arg7, %7 -> %arg8 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { omp.teams num_teams( to %arg5 : i16) { omp.distribute private(@_QFnum_teams_arg_2Ei_private_i32 %arg7 -> %arg9 : !llvm.ptr) { omp.loop_nest (%arg10) : i32 = (%arg2) to (%arg3) inclusive step (%arg4) { @@ -127,7 +127,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %3 = llvm.load %arg0 : !llvm.ptr -> i32 %4 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "i"} %5 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target host_eval(%0 -> %arg1, %3 -> %arg2, %0 -> %arg3 : i32, i32, i32) map_entries(%4 -> %arg4, %5 -> %arg5 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) host_eval(%0 -> %arg1, %3 -> %arg2, %0 -> %arg3 : i32, i32, i32) map_entries(%4 -> %arg4, %5 -> %arg5 : !llvm.ptr, !llvm.ptr) { %6 = llvm.mlir.constant(137 : i64) : i64 omp.teams thread_limit(%6 : i64) { omp.distribute private(@_QFthread_limit_const_8Ei_private_i32 %arg4 -> %arg6 : !llvm.ptr) { @@ -153,7 +153,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %4 = omp.map.info var_ptr(%arg1 : !llvm.ptr, i64) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "t"} %5 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "i"} %6 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target host_eval(%0 -> %arg2, %3 -> %arg3, %0 -> %arg4 : i32, i32, i32) map_entries(%4 -> %arg5, %5 -> %arg6, %6 -> %arg7 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) host_eval(%0 -> %arg2, %3 -> %arg3, %0 -> %arg4 : i32, i32, i32) map_entries(%4 -> %arg5, %5 -> %arg6, %6 -> %arg7 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { %7 = llvm.load %arg5 : !llvm.ptr -> i64 omp.teams thread_limit(%7 : i64) { omp.distribute private(@_QFthread_limit_arg_8Ei_private_i32 %arg6 -> %arg8 : !llvm.ptr) { @@ -180,7 +180,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %3 = llvm.load %arg0 : !llvm.ptr -> i32 %4 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "i"} %5 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target host_eval(%0 -> %arg1, %3 -> %arg2, %0 -> %arg3 : i32, i32, i32) map_entries(%4 -> %arg4, %5 -> %arg5 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) host_eval(%0 -> %arg1, %3 -> %arg2, %0 -> %arg3 : i32, i32, i32) map_entries(%4 -> %arg4, %5 -> %arg5 : !llvm.ptr, !llvm.ptr) { %6 = llvm.mlir.constant(137 : i16) : i16 omp.teams thread_limit(%6 : i16) { omp.distribute private(@_QFthread_limit_const_2Ei_private_i32 %arg4 -> %arg6 : !llvm.ptr) { @@ -206,7 +206,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %4 = omp.map.info var_ptr(%arg1 : !llvm.ptr, i16) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "t"} %5 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "i"} %6 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target host_eval(%0 -> %arg2, %3 -> %arg3, %0 -> %arg4 : i32, i32, i32) map_entries(%4 -> %arg5, %5 -> %arg6, %6 -> %arg7 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(generic) host_eval(%0 -> %arg2, %3 -> %arg3, %0 -> %arg4 : i32, i32, i32) map_entries(%4 -> %arg5, %5 -> %arg6, %6 -> %arg7 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { %7 = llvm.load %arg5 : !llvm.ptr -> i16 omp.teams thread_limit(%7 : i16) { omp.distribute private(@_QFthread_limit_arg_2Ei_private_i32 %arg6 -> %arg8 : !llvm.ptr) { @@ -234,7 +234,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %4 = llvm.load %arg0 : !llvm.ptr -> i32 %5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "i"} %6 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target host_eval(%1 -> %arg1, %4 -> %arg2, %1 -> %arg3, %0 -> %arg4 : i32, i32, i32, i64) map_entries(%5 -> %arg5, %6 -> %arg6 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(spmd) host_eval(%1 -> %arg1, %4 -> %arg2, %1 -> %arg3, %0 -> %arg4 : i32, i32, i32, i64) map_entries(%5 -> %arg5, %6 -> %arg6 : !llvm.ptr, !llvm.ptr) { omp.teams { omp.parallel num_threads(%arg4 : i64) private(@_QFnum_threads_const_8Ei_private_i32 %arg5 -> %arg7 : !llvm.ptr) { omp.distribute { @@ -266,7 +266,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %6 = omp.map.info var_ptr(%2 : !llvm.ptr, i64) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "t"} %7 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "i"} %8 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target host_eval(%0 -> %arg1, %5 -> %arg2, %0 -> %arg3, %4 -> %arg4 : i32, i32, i32, i64) map_entries(%6 -> %arg5, %7 -> %arg6, %8 -> %arg7 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(spmd) host_eval(%0 -> %arg1, %5 -> %arg2, %0 -> %arg3, %4 -> %arg4 : i32, i32, i32, i64) map_entries(%6 -> %arg5, %7 -> %arg6, %8 -> %arg7 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { omp.teams { omp.parallel num_threads(%arg4 : i64) private(@_QFnum_threads_arg_8Ei_private_i32 %arg6 -> %arg8 : !llvm.ptr) { omp.distribute { @@ -297,7 +297,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %4 = llvm.load %arg0 : !llvm.ptr -> i32 %5 = omp.map.info var_ptr(%3 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "i"} %6 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target host_eval(%1 -> %arg1, %4 -> %arg2, %1 -> %arg3, %0 -> %arg4 : i32, i32, i32, i16) map_entries(%5 -> %arg5, %6 -> %arg6 : !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(spmd) host_eval(%1 -> %arg1, %4 -> %arg2, %1 -> %arg3, %0 -> %arg4 : i32, i32, i32, i16) map_entries(%5 -> %arg5, %6 -> %arg6 : !llvm.ptr, !llvm.ptr) { omp.teams { omp.parallel num_threads(%arg4 : i16) private(@_QFnum_threads_const_2Ei_private_i32 %arg5 -> %arg7 : !llvm.ptr) { omp.distribute { @@ -328,7 +328,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec = dense<32> : vec %5 = omp.map.info var_ptr(%arg1 : !llvm.ptr, i16) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "t"} %6 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "i"} %7 = omp.map.info var_ptr(%arg0 : !llvm.ptr, i32) map_clauses(implicit) capture(ByCopy) -> !llvm.ptr {name = "n"} - omp.target host_eval(%0 -> %arg2, %4 -> %arg3, %0 -> %arg4, %3 -> %arg5 : i32, i32, i32, i16) map_entries(%5 -> %arg6, %6 -> %arg7, %7 -> %arg8 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { + omp.target kernel_type(spmd) host_eval(%0 -> %arg2, %4 -> %arg3, %0 -> %arg4, %3 -> %arg5 : i32, i32, i32, i16) map_entries(%5 -> %arg6, %6 -> %arg7, %7 -> %arg8 : !llvm.ptr, !llvm.ptr, !llvm.ptr) { omp.teams { omp.parallel num_threads(%arg5 : i16) private(@_QFnum_threads_arg_2Ei_private_i32 %arg7 -> %arg9 : !llvm.ptr) { omp.distribute { diff --git a/mlir/test/Target/LLVMIR/openmp-todo.mlir b/mlir/test/Target/LLVMIR/openmp-todo.mlir index 9a10ad74baeb6..94e66829bb3bb 100644 --- a/mlir/test/Target/LLVMIR/openmp-todo.mlir +++ b/mlir/test/Target/LLVMIR/openmp-todo.mlir @@ -165,7 +165,7 @@ llvm.func @single_private(%x : !llvm.ptr) { llvm.func @target_allocate(%x : !llvm.ptr) { // expected-error@below {{not yet implemented: Unhandled clause allocate in omp.target operation}} // expected-error@below {{LLVM Translation failed for operation: omp.target}} - omp.target allocate(%x : !llvm.ptr -> %x : !llvm.ptr) { + omp.target kernel_type(generic) allocate(%x : !llvm.ptr -> %x : !llvm.ptr) { omp.terminator } llvm.return @@ -193,7 +193,7 @@ atomic { llvm.func @target_in_reduction(%x : !llvm.ptr) { // expected-error@below {{not yet implemented: Unhandled clause in_reduction in omp.target operation}} // expected-error@below {{LLVM Translation failed for operation: omp.target}} - omp.target in_reduction(@add_f32 %x -> %prv : !llvm.ptr) { + omp.target kernel_type(generic) in_reduction(@add_f32 %x -> %prv : !llvm.ptr) { omp.terminator } llvm.return