diff --git a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.h b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.h index 57d532b078b9e..27f65aa15f040 100644 --- a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.h @@ -9,6 +9,9 @@ #ifndef MLIR_DIALECT_OPENACC_TRANSFORMS_PASSES_H #define MLIR_DIALECT_OPENACC_TRANSFORMS_PASSES_H +#include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" #include "mlir/Pass/Pass.h" namespace mlir { diff --git a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td index 9ceb91e5679a1..40ccd1fc6c1a0 100644 --- a/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td +++ b/mlir/include/mlir/Dialect/OpenACC/Transforms/Passes.td @@ -27,4 +27,40 @@ def LegalizeDataValuesInRegion : Pass<"openacc-legalize-data-values", "mlir::fun ]; } +def ACCImplicitData : Pass<"acc-implicit-data", "mlir::ModuleOp"> { + let summary = "Generate implicit data attributes for OpenACC compute constructs"; + let description = [{ + This pass implements the OpenACC specification for "Variables with + Implicitly Determined Data Attributes" (OpenACC 3.4 spec, section 2.6.2). + + The pass automatically generates data clause operations for variables used + within OpenACC compute constructs (parallel, kernels, serial) that do not + already have explicit data clauses. The semantics follow these rules: + + 1. If there is a default(none) clause visible, no implicit data actions + apply. + + 2. An aggregate variable (arrays, derived types, etc.) will be treated as: + - In a present clause when default(present) is visible. + - In a copy clause otherwise. + + 3. A scalar variable will be treated as if it appears in: + - A copy clause if the compute construct is a kernels construct. + - A firstprivate clause otherwise (parallel, serial). + }]; + let dependentDialects = ["mlir::acc::OpenACCDialect", + "mlir::memref::MemRefDialect", + "mlir::arith::ArithDialect"]; + let options = [ + Option<"enableImplicitReductionCopy", "enable-implicit-reduction-copy", + "bool", "true", + "Enable applying implicit copy in lieu of implicit firstprivate for " + "reduction variables. This allows uniform treatment of reduction " + "variables between combined constructs (e.g., 'parallel loop') and " + "separate constructs (e.g., 'parallel' followed by 'loop'), where " + "the OpenACC spec requires copy semantics for the former but " + "firstprivate would normally apply for the latter."> + ]; +} + #endif // MLIR_DIALECT_OPENACC_TRANSFORMS_PASSES diff --git a/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp new file mode 100644 index 0000000000000..a99e4846eea20 --- /dev/null +++ b/mlir/lib/Dialect/OpenACC/Transforms/ACCImplicitData.cpp @@ -0,0 +1,880 @@ +//===- ACCImplicitData.cpp ------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass implements the OpenACC specification for "Variables with +// Implicitly Determined Data Attributes" (OpenACC 3.4 spec, section 2.6.2). +// +// Overview: +// --------- +// The pass automatically generates data clause operations for variables used +// within OpenACC compute constructs (parallel, kernels, serial) that do not +// already have explicit data clauses. The semantics follow these rules: +// +// 1. If there is a default(none) clause visible, no implicit data actions +// apply. +// +// 2. An aggregate variable (arrays, derived types, etc.) will be treated as: +// - In a present clause when default(present) is visible. +// - In a copy clause otherwise. +// +// 3. A scalar variable will be treated as if it appears in: +// - A copy clause if the compute construct is a kernels construct. +// - A firstprivate clause otherwise (parallel, serial). +// +// Requirements: +// ------------- +// To use this pass in a pipeline, the following requirements must be met: +// +// 1. Type Interface Implementation: Variables from the dialect being used +// must implement one or both of the following MLIR interfaces: +// `acc::MappableType` and/or `acc::PointerLikeType` +// +// These interfaces provide the necessary methods for the pass to: +// - Determine variable type categories (scalar vs. aggregate) +// - Generate appropriate bounds information +// - Generate privatization recipes +// +// 2. Operation Interface Implementation: Operations that access partial +// entities or create views should implement the following MLIR +// interfaces: `acc::PartialEntityAccess` and/or +// `mlir::ViewLikeOpInterface` +// +// These interfaces are used for proper data clause ordering, ensuring +// that base entities are mapped before derived entities (e.g., a +// struct is mapped before its fields, an array is mapped before +// subarray views). +// +// 3. Analysis Registration (Optional): If custom behavior is needed for +// variable name extraction or alias analysis, the dialect should +// pre-register the `acc::OpenACCSupport` and `mlir::AliasAnalysis` analyses. +// +// If not registered, default behavior will be used. +// +// Implementation Details: +// ----------------------- +// The pass performs the following operations: +// +// 1. Finds candidate variables which are live-in to the compute region and +// are not already in a data clause or private clause. +// +// 2. Generates both data "entry" and "exit" clause operations that match +// the intended action depending on variable type: +// - copy -> acc.copyin (entry) + acc.copyout (exit) +// - present -> acc.present (entry) + acc.delete (exit) +// - firstprivate -> acc.firstprivate (entry only, no exit) +// +// 3. Ensures that default clause is taken into consideration by looking +// through current construct and parent constructs to find the "visible +// default clause". +// +// 4. Fixes up SSA value links so that uses in the acc region reference the +// result of the newly created data clause operations. +// +// 5. When generating implicit data clause operations, it also adds variable +// name information and marks them with the implicit flag. +// +// 6. Recipes are generated by calling the appropriate entrypoints in the +// MappableType and PointerLikeType interfaces. +// +// 7. AliasAnalysis is used to determine if a variable is already covered by +// an existing data clause (e.g., an interior pointer covered by its parent). +// +// Examples: +// --------- +// +// Example 1: Scalar in parallel construct (implicit firstprivate) +// +// Before: +// func.func @test() { +// %scalar = memref.alloca() {acc.var_name = "x"} : memref +// acc.parallel { +// %val = memref.load %scalar[] : memref +// acc.yield +// } +// } +// +// After: +// func.func @test() { +// %scalar = memref.alloca() {acc.var_name = "x"} : memref +// %firstpriv = acc.firstprivate varPtr(%scalar : memref) +// -> memref {implicit = true, name = "x"} +// acc.parallel firstprivate(@recipe -> %firstpriv : memref) { +// %val = memref.load %firstpriv[] : memref +// acc.yield +// } +// } +// +// Example 2: Scalar in kernels construct (implicit copy) +// +// Before: +// func.func @test() { +// %scalar = memref.alloca() {acc.var_name = "n"} : memref +// acc.kernels { +// %val = memref.load %scalar[] : memref +// acc.terminator +// } +// } +// +// After: +// func.func @test() { +// %scalar = memref.alloca() {acc.var_name = "n"} : memref +// %copyin = acc.copyin varPtr(%scalar : memref) -> memref +// {dataClause = #acc, +// implicit = true, name = "n"} +// acc.kernels dataOperands(%copyin : memref) { +// %val = memref.load %copyin[] : memref +// acc.terminator +// } +// acc.copyout accPtr(%copyin : memref) +// to varPtr(%scalar : memref) +// {dataClause = #acc, +// implicit = true, name = "n"} +// } +// +// Example 3: Array (aggregate) in parallel (implicit copy) +// +// Before: +// func.func @test() { +// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32> +// acc.parallel { +// %c0 = arith.constant 0 : index +// %val = memref.load %array[%c0] : memref<100xf32> +// acc.yield +// } +// } +// +// After: +// func.func @test() { +// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32> +// %copyin = acc.copyin varPtr(%array : memref<100xf32>) +// -> memref<100xf32> +// {dataClause = #acc, +// implicit = true, name = "arr"} +// acc.parallel dataOperands(%copyin : memref<100xf32>) { +// %c0 = arith.constant 0 : index +// %val = memref.load %copyin[%c0] : memref<100xf32> +// acc.yield +// } +// acc.copyout accPtr(%copyin : memref<100xf32>) +// to varPtr(%array : memref<100xf32>) +// {dataClause = #acc, +// implicit = true, name = "arr"} +// } +// +// Example 4: Array with default(present) +// +// Before: +// func.func @test() { +// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32> +// acc.parallel { +// %c0 = arith.constant 0 : index +// %val = memref.load %array[%c0] : memref<100xf32> +// acc.yield +// } attributes {defaultAttr = #acc} +// } +// +// After: +// func.func @test() { +// %array = memref.alloca() {acc.var_name = "arr"} : memref<100xf32> +// %present = acc.present varPtr(%array : memref<100xf32>) +// -> memref<100xf32> +// {implicit = true, name = "arr"} +// acc.parallel dataOperands(%present : memref<100xf32>) +// attributes {defaultAttr = #acc} { +// %c0 = arith.constant 0 : index +// %val = memref.load %present[%c0] : memref<100xf32> +// acc.yield +// } +// acc.delete accPtr(%present : memref<100xf32>) +// {dataClause = #acc, +// implicit = true, name = "arr"} +// } +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/OpenACC/Transforms/Passes.h" + +#include "mlir/Analysis/AliasAnalysis.h" +#include "mlir/Dialect/OpenACC/Analysis/OpenACCSupport.h" +#include "mlir/Dialect/OpenACC/OpenACC.h" +#include "mlir/Dialect/OpenACC/OpenACCUtils.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/Dominance.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/Value.h" +#include "mlir/Interfaces/FunctionInterfaces.h" +#include "mlir/Interfaces/ViewLikeInterface.h" +#include "mlir/Transforms/RegionUtils.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/ErrorHandling.h" +#include + +namespace mlir { +namespace acc { +#define GEN_PASS_DEF_ACCIMPLICITDATA +#include "mlir/Dialect/OpenACC/Transforms/Passes.h.inc" +} // namespace acc +} // namespace mlir + +#define DEBUG_TYPE "acc-implicit-data" + +using namespace mlir; + +namespace { + +class ACCImplicitData : public acc::impl::ACCImplicitDataBase { +public: + using acc::impl::ACCImplicitDataBase::ACCImplicitDataBase; + + void runOnOperation() override; + +private: + /// Collects all data clauses that dominate the compute construct. + /// Needed to determine if a variable is already covered by an existing data + /// clause. + SmallVector getDominatingDataClauses(Operation *computeConstructOp); + + /// Looks through the `dominatingDataClauses` to find the original data clause + /// op for an alias. Returns nullptr if no original data clause op is found. + template + Operation *getOriginalDataClauseOpForAlias( + Value var, OpBuilder &builder, OpT computeConstructOp, + const SmallVector &dominatingDataClauses); + + /// Generates the appropriate `acc.copyin`, `acc.present`,`acc.firstprivate`, + /// etc. data clause op for a candidate variable. + template + Operation *generateDataClauseOpForCandidate( + Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp, + const SmallVector &dominatingDataClauses, + const std::optional &defaultClause); + + /// Generates the implicit data ops for a compute construct. + template + void generateImplicitDataOps( + ModuleOp &module, OpT computeConstructOp, + std::optional &defaultClause); + + /// Generates a private recipe for a variable. + acc::PrivateRecipeOp generatePrivateRecipe(ModuleOp &module, Value var, + Location loc, OpBuilder &builder, + acc::OpenACCSupport &accSupport); + + /// Generates a firstprivate recipe for a variable. + acc::FirstprivateRecipeOp + generateFirstprivateRecipe(ModuleOp &module, Value var, Location loc, + OpBuilder &builder, + acc::OpenACCSupport &accSupport); + + /// Generates recipes for a list of variables. + void generateRecipes(ModuleOp &module, OpBuilder &builder, + Operation *computeConstructOp, + const SmallVector &newOperands, + SmallVector &newRecipeSyms); +}; + +/// Determines if a variable is a candidate for implicit data mapping. +/// Returns true if the variable is a candidate, false otherwise. +static bool isCandidateForImplicitData(Value val, Region &accRegion) { + // Ensure the variable is an allowed type for data clause. + if (!acc::isPointerLikeType(val.getType()) && + !acc::isMappableType(val.getType())) + return false; + + // If this is already coming from a data clause, we do not need to generate + // another. + if (isa_and_nonnull(val.getDefiningOp())) + return false; + + // If this is only used by private clauses, it is not a real live-in. + if (acc::isOnlyUsedByPrivateClauses(val, accRegion)) + return false; + + return true; +} + +SmallVector +ACCImplicitData::getDominatingDataClauses(Operation *computeConstructOp) { + llvm::SmallSetVector dominatingDataClauses; + + llvm::TypeSwitch(computeConstructOp) + .Case([&](auto op) { + for (auto dataClause : op.getDataClauseOperands()) { + dominatingDataClauses.insert(dataClause); + } + }) + .Default([](Operation *) {}); + + // Collect the data clauses from enclosing data constructs. + Operation *currParentOp = computeConstructOp->getParentOp(); + while (currParentOp) { + if (isa(currParentOp)) { + for (auto dataClause : + dyn_cast(currParentOp).getDataClauseOperands()) { + dominatingDataClauses.insert(dataClause); + } + } + currParentOp = currParentOp->getParentOp(); + } + + // Find the enclosing function/subroutine + auto funcOp = computeConstructOp->getParentOfType(); + if (!funcOp) + return dominatingDataClauses.takeVector(); + + // Walk the function to find `acc.declare_enter`/`acc.declare_exit` pairs that + // dominate and post-dominate the compute construct and add their data + // clauses to the list. + auto &domInfo = this->getAnalysis(); + auto &postDomInfo = this->getAnalysis(); + funcOp->walk([&](acc::DeclareEnterOp declareEnterOp) { + if (domInfo.dominates(declareEnterOp.getOperation(), computeConstructOp)) { + // Collect all `acc.declare_exit` ops for this token. + SmallVector exits; + for (auto *user : declareEnterOp.getToken().getUsers()) + if (auto declareExit = dyn_cast(user)) + exits.push_back(declareExit); + + // Only add clauses if every `acc.declare_exit` op post-dominates the + // compute construct. + if (!exits.empty() && llvm::all_of(exits, [&](acc::DeclareExitOp exitOp) { + return postDomInfo.postDominates(exitOp, computeConstructOp); + })) { + for (auto dataClause : declareEnterOp.getDataClauseOperands()) + dominatingDataClauses.insert(dataClause); + } + } + }); + + return dominatingDataClauses.takeVector(); +} + +template +Operation *ACCImplicitData::getOriginalDataClauseOpForAlias( + Value var, OpBuilder &builder, OpT computeConstructOp, + const SmallVector &dominatingDataClauses) { + auto &aliasAnalysis = this->getAnalysis(); + for (auto dataClause : dominatingDataClauses) { + if (auto *dataClauseOp = dataClause.getDefiningOp()) { + // Only accept clauses that guarantee that the alias is present. + if (isa(dataClauseOp)) + if (aliasAnalysis.alias(acc::getVar(dataClauseOp), var).isMust()) + return dataClauseOp; + } + } + return nullptr; +} + +// Generates bounds for variables that have unknown dimensions +static void fillInBoundsForUnknownDimensions(Operation *dataClauseOp, + OpBuilder &builder) { + + if (!acc::getBounds(dataClauseOp).empty()) + // If bounds are already present, do not overwrite them. + return; + + // For types that have unknown dimensions, attempt to generate bounds by + // relying on MappableType being able to extract it from the IR. + auto var = acc::getVar(dataClauseOp); + auto type = var.getType(); + if (auto mappableTy = dyn_cast(type)) { + if (mappableTy.hasUnknownDimensions()) { + TypeSwitch(dataClauseOp) + .Case([&](auto dataClauseOp) { + if (std::is_same_v) + return; + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPoint(dataClauseOp); + auto bounds = mappableTy.generateAccBounds(var, builder); + if (!bounds.empty()) + dataClauseOp.getBoundsMutable().assign(bounds); + }); + } + } +} + +acc::PrivateRecipeOp +ACCImplicitData::generatePrivateRecipe(ModuleOp &module, Value var, + Location loc, OpBuilder &builder, + acc::OpenACCSupport &accSupport) { + auto type = var.getType(); + std::string recipeName = + accSupport.getRecipeName(acc::RecipeKind::private_recipe, type, var); + + // Check if recipe already exists + auto existingRecipe = module.lookupSymbol(recipeName); + if (existingRecipe) + return existingRecipe; + + // Set insertion point to module body in a scoped way + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToStart(module.getBody()); + + auto recipe = + acc::PrivateRecipeOp::createAndPopulate(builder, loc, recipeName, type); + if (!recipe.has_value()) + return accSupport.emitNYI(loc, "implicit private"), nullptr; + return recipe.value(); +} + +acc::FirstprivateRecipeOp +ACCImplicitData::generateFirstprivateRecipe(ModuleOp &module, Value var, + Location loc, OpBuilder &builder, + acc::OpenACCSupport &accSupport) { + auto type = var.getType(); + std::string recipeName = + accSupport.getRecipeName(acc::RecipeKind::firstprivate_recipe, type, var); + + // Check if recipe already exists + auto existingRecipe = + module.lookupSymbol(recipeName); + if (existingRecipe) + return existingRecipe; + + // Set insertion point to module body in a scoped way + OpBuilder::InsertionGuard guard(builder); + builder.setInsertionPointToStart(module.getBody()); + + auto recipe = acc::FirstprivateRecipeOp::createAndPopulate(builder, loc, + recipeName, type); + if (!recipe.has_value()) + return accSupport.emitNYI(loc, "implicit firstprivate"), nullptr; + return recipe.value(); +} + +void ACCImplicitData::generateRecipes(ModuleOp &module, OpBuilder &builder, + Operation *computeConstructOp, + const SmallVector &newOperands, + SmallVector &newRecipeSyms) { + auto &accSupport = this->getAnalysis(); + for (auto var : newOperands) { + auto loc{var.getLoc()}; + if (isa(var.getDefiningOp())) { + auto recipe = generatePrivateRecipe( + module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport); + if (recipe) + newRecipeSyms.push_back(SymbolRefAttr::get(module->getContext(), + recipe.getSymName().str())); + } else if (isa(var.getDefiningOp())) { + auto recipe = generateFirstprivateRecipe( + module, acc::getVar(var.getDefiningOp()), loc, builder, accSupport); + if (recipe) + newRecipeSyms.push_back(SymbolRefAttr::get(module->getContext(), + recipe.getSymName().str())); + } else { + accSupport.emitNYI(var.getLoc(), "implicit reduction"); + } + } +} + +// Generates the data entry data op clause so that it adheres to OpenACC +// rules as follows (line numbers and specification from OpenACC 3.4): +// 1388 An aggregate variable will be treated as if it appears either: +// 1389 - In a present clause if there is a default(present) clause visible at +// the compute construct. +// 1391 - In a copy clause otherwise. +// 1392 A scalar variable will be treated as if it appears either: +// 1393 - In a copy clause if the compute construct is a kernels construct. +// 1394 - In a firstprivate clause otherwise. +template +Operation *ACCImplicitData::generateDataClauseOpForCandidate( + Value var, ModuleOp &module, OpBuilder &builder, OpT computeConstructOp, + const SmallVector &dominatingDataClauses, + const std::optional &defaultClause) { + auto &accSupport = this->getAnalysis(); + acc::VariableTypeCategory typeCategory = + acc::VariableTypeCategory::uncategorized; + if (auto mappableTy = dyn_cast(var.getType())) { + typeCategory = mappableTy.getTypeCategory(var); + } else if (auto pointerLikeTy = + dyn_cast(var.getType())) { + typeCategory = pointerLikeTy.getPointeeTypeCategory( + cast>(var), + pointerLikeTy.getElementType()); + } + + bool isScalar = + acc::bitEnumContainsAny(typeCategory, acc::VariableTypeCategory::scalar); + bool isAnyAggregate = acc::bitEnumContainsAny( + typeCategory, acc::VariableTypeCategory::aggregate); + Location loc = computeConstructOp->getLoc(); + + Operation *op = nullptr; + op = getOriginalDataClauseOpForAlias(var, builder, computeConstructOp, + dominatingDataClauses); + if (op) { + if (isa(op)) + return acc::NoCreateOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var), + acc::getBounds(op)); + + if (isa(op)) + return acc::DevicePtrOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var), + acc::getBounds(op)); + + // The original data clause op is a PresentOp, CopyinOp, or CreateOp, + // hence guaranteed to be present. + return acc::PresentOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var), + acc::getBounds(op)); + } else if (isScalar) { + if (enableImplicitReductionCopy && + acc::isOnlyUsedByReductionClauses(var, + computeConstructOp->getRegion(0))) { + auto copyinOp = + acc::CopyinOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var)); + copyinOp.setDataClause(acc::DataClause::acc_reduction); + return copyinOp.getOperation(); + } + if constexpr (std::is_same_v || + std::is_same_v) { + // Scalars are implicit copyin in kernels construct. + // We also do the same for acc.kernel_environment because semantics + // of user variable mappings should be applied while ACC construct exists + // and at this point we should only be dealing with unmapped variables + // that were made live-in by the compiler. + // TODO: This may be revisited. + auto copyinOp = + acc::CopyinOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var)); + copyinOp.setDataClause(acc::DataClause::acc_copy); + return copyinOp.getOperation(); + } else { + // Scalars are implicit firstprivate in parallel and serial construct. + return acc::FirstprivateOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var)); + } + } else if (isAnyAggregate) { + Operation *newDataOp = nullptr; + + // When default(present) is true, the implicit behavior is present. + if (defaultClause.has_value() && + defaultClause.value() == acc::ClauseDefaultValue::Present) { + newDataOp = acc::PresentOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var)); + } else { + SmallVector bounds; + auto copyinOp = + acc::CopyinOp::create(builder, loc, var, + /*structured=*/true, /*implicit=*/true, + accSupport.getVariableName(var)); + copyinOp.setDataClause(acc::DataClause::acc_copy); + newDataOp = copyinOp.getOperation(); + } + + return newDataOp; + } else { + // This is not a fatal error - for example when the element type is + // pointer type (aka we have a pointer of pointer), it is potentially a + // deep copy scenario which is not being handled here. + // Other types need to be canonicalized. Thus just log unhandled cases. + LLVM_DEBUG(llvm::dbgs() + << "Unhandled case for implicit data mapping " << var << "\n"); + } + return nullptr; +} + +// Ensures that result values from the acc data clause ops are used inside the +// acc region. ie: +// acc.kernels { +// use %val +// } +// => +// %dev = acc.dataop %val +// acc.kernels { +// use %dev +// } +static void legalizeValuesInRegion(Region &accRegion, + SmallVector &newPrivateOperands, + SmallVector &newDataClauseOperands) { + for (Value dataClause : + llvm::concat(newDataClauseOperands, newPrivateOperands)) { + Value var = acc::getVar(dataClause.getDefiningOp()); + replaceAllUsesInRegionWith(var, dataClause, accRegion); + } +} + +// Adds the private operands and private recipes to the data construct +// operation in a valid way (ensures that the index in the privatizationRecipes +// array matches the position of the private operand). +template +static void +addNewPrivateOperands(OpT &accOp, const SmallVector &privateOperands, + const SmallVector &privateRecipeSyms) { + assert(privateOperands.size() == privateRecipeSyms.size()); + if (privateOperands.empty()) + return; + + SmallVector completePrivateRecipesSyms; + SmallVector completeFirstprivateRecipesSyms; + SmallVector newPrivateOperands; + SmallVector newFirstprivateOperands; + + // Collect all of the existing recipes since they are held in an attribute. + // To add to it, we need to create a brand new one. + if (accOp.getPrivatizationRecipes().has_value()) + for (auto privatization : accOp.getPrivatizationRecipesAttr()) + completePrivateRecipesSyms.push_back(privatization); + if (accOp.getFirstprivatizationRecipes().has_value()) + for (auto privatization : accOp.getFirstprivatizationRecipesAttr()) + completeFirstprivateRecipesSyms.push_back(privatization); + + // Now separate between private and firstprivate operands. + for (auto [priv, privateRecipeSym] : + llvm::zip(privateOperands, privateRecipeSyms)) { + if (isa(priv.getDefiningOp())) { + newPrivateOperands.push_back(priv); + completePrivateRecipesSyms.push_back(privateRecipeSym); + } else if (isa(priv.getDefiningOp())) { + newFirstprivateOperands.push_back(priv); + completeFirstprivateRecipesSyms.push_back(privateRecipeSym); + } else { + llvm_unreachable("unhandled private operand"); + } + } + + // Append all of the new private operands to their appropriate list. + accOp.getPrivateOperandsMutable().append(newPrivateOperands); + accOp.getFirstprivateOperandsMutable().append(newFirstprivateOperands); + + // Update the privatizationRecipes attributes to hold all of the new recipes. + if (!completePrivateRecipesSyms.empty()) + accOp.setPrivatizationRecipesAttr( + ArrayAttr::get(accOp.getContext(), completePrivateRecipesSyms)); + if (!completeFirstprivateRecipesSyms.empty()) + accOp.setFirstprivatizationRecipesAttr( + ArrayAttr::get(accOp.getContext(), completeFirstprivateRecipesSyms)); +} + +static Operation *findDataExitOp(Operation *dataEntryOp) { + auto res = acc::getAccVar(dataEntryOp); + for (auto *user : res.getUsers()) + if (isa(user)) + return user; + return nullptr; +} + +// Generates matching data exit operation as described in the acc dialect +// for how data clauses are decomposed: +// https://mlir.llvm.org/docs/Dialects/OpenACCDialect/#operation-categories +// Key ones used here: +// * acc {construct} copy -> acc.copyin (before region) + acc.copyout (after +// region) +// * acc {construct} present -> acc.present (before region) + acc.delete +// (after region) +static void +generateDataExitOperations(OpBuilder &builder, Operation *accOp, + const SmallVector &newDataClauseOperands, + const SmallVector &sortedDataClauseOperands) { + builder.setInsertionPointAfter(accOp); + Value lastDataClause = nullptr; + for (auto dataEntry : llvm::reverse(sortedDataClauseOperands)) { + if (llvm::find(newDataClauseOperands, dataEntry) == + newDataClauseOperands.end()) { + // If this is not a new data clause operand, we should not generate an + // exit operation for it. + lastDataClause = dataEntry; + continue; + } + if (lastDataClause) + if (auto *dataExitOp = findDataExitOp(lastDataClause.getDefiningOp())) + builder.setInsertionPointAfter(dataExitOp); + Operation *dataEntryOp = dataEntry.getDefiningOp(); + if (isa(dataEntryOp)) { + auto copyoutOp = acc::CopyoutOp::create( + builder, dataEntryOp->getLoc(), dataEntry, acc::getVar(dataEntryOp), + /*structured=*/true, /*implicit=*/true, + acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp)); + copyoutOp.setDataClause(acc::DataClause::acc_copy); + } else if (isa(dataEntryOp)) { + auto deleteOp = acc::DeleteOp::create( + builder, dataEntryOp->getLoc(), dataEntry, + /*structured=*/true, /*implicit=*/true, + acc::getVarName(dataEntryOp).value(), acc::getBounds(dataEntryOp)); + deleteOp.setDataClause(acc::getDataClause(dataEntryOp).value()); + } else if (isa(dataEntryOp)) { + // Do nothing. + } else { + llvm_unreachable("unhandled data exit"); + } + lastDataClause = dataEntry; + } +} + +/// Returns all base references of a value in order. +/// So for example, if we have a reference to a struct field like +/// s.f1.f2.f3, this will return . +/// Any intermediate casts/view-like operations are included in the +/// chain as well. +static SmallVector getBaseRefsChain(Value val) { + SmallVector baseRefs; + baseRefs.push_back(val); + while (true) { + Value prevVal = val; + + val = acc::getBaseEntity(val); + if (val != baseRefs.front()) + baseRefs.insert(baseRefs.begin(), val); + + // If this is a view-like operation, it is effectively another + // view of the same entity so we should add it to the chain also. + if (auto viewLikeOp = val.getDefiningOp()) { + val = viewLikeOp.getViewSource(); + baseRefs.insert(baseRefs.begin(), val); + } + + // Continue loop if we made any progress + if (val == prevVal) + break; + } + + return baseRefs; +} + +static void insertInSortedOrder(SmallVector &sortedDataClauseOperands, + Operation *newClause) { + auto *insertPos = + std::find_if(sortedDataClauseOperands.begin(), + sortedDataClauseOperands.end(), [&](Value dataClauseVal) { + // Get the base refs for the current clause we are looking + // at. + auto var = acc::getVar(dataClauseVal.getDefiningOp()); + auto baseRefs = getBaseRefsChain(var); + + // If the newClause is of a base ref of an existing clause, + // we should insert it right before the current clause. + // Thus return true to stop iteration when this is the + // case. + return std::find(baseRefs.begin(), baseRefs.end(), + acc::getVar(newClause)) != baseRefs.end(); + }); + + if (insertPos != sortedDataClauseOperands.end()) { + newClause->moveBefore(insertPos->getDefiningOp()); + sortedDataClauseOperands.insert(insertPos, acc::getAccVar(newClause)); + } else { + sortedDataClauseOperands.push_back(acc::getAccVar(newClause)); + } +} + +template +void ACCImplicitData::generateImplicitDataOps( + ModuleOp &module, OpT computeConstructOp, + std::optional &defaultClause) { + // Implicit data attributes are only applied if "[t]here is no default(none) + // clause visible at the compute construct." + if (defaultClause.has_value() && + defaultClause.value() == acc::ClauseDefaultValue::None) + return; + assert(!defaultClause.has_value() || + defaultClause.value() == acc::ClauseDefaultValue::Present); + + // 1) Collect live-in values. + Region &accRegion = computeConstructOp->getRegion(0); + SetVector liveInValues; + getUsedValuesDefinedAbove(accRegion, liveInValues); + + // 2) Run the filtering to find relevant pointers that need copied. + auto isCandidate{[&](Value val) -> bool { + return isCandidateForImplicitData(val, accRegion); + }}; + auto candidateVars( + llvm::to_vector(llvm::make_filter_range(liveInValues, isCandidate))); + if (candidateVars.empty()) + return; + + // 3) Generate data clauses for the variables. + SmallVector newPrivateOperands; + SmallVector newDataClauseOperands; + OpBuilder builder(computeConstructOp); + if (!candidateVars.empty()) { + LLVM_DEBUG(llvm::dbgs() << "== Generating clauses for ==\n" + << computeConstructOp << "\n"); + } + auto dominatingDataClauses = getDominatingDataClauses(computeConstructOp); + for (auto var : candidateVars) { + auto newDataClauseOp = generateDataClauseOpForCandidate( + var, module, builder, computeConstructOp, dominatingDataClauses, + defaultClause); + fillInBoundsForUnknownDimensions(newDataClauseOp, builder); + LLVM_DEBUG(llvm::dbgs() << "Generated data clause for " << var << ":\n" + << "\t" << *newDataClauseOp << "\n"); + if (isa_and_nonnull( + newDataClauseOp)) { + newPrivateOperands.push_back(acc::getAccVar(newDataClauseOp)); + } else if (isa_and_nonnull(newDataClauseOp)) { + newDataClauseOperands.push_back(acc::getAccVar(newDataClauseOp)); + dominatingDataClauses.push_back(acc::getAccVar(newDataClauseOp)); + } + } + + // 4) Legalize values in region (aka the uses in the region are the result + // of the data clause ops) + legalizeValuesInRegion(accRegion, newPrivateOperands, newDataClauseOperands); + + SmallVector newPrivateRecipeSyms; + // 5) Generate private recipes which are required for properly attaching + // private operands. + if constexpr (!std::is_same_v && + !std::is_same_v) + generateRecipes(module, builder, computeConstructOp, newPrivateOperands, + newPrivateRecipeSyms); + + // 6) Figure out insertion order for the new data clause operands. + SmallVector sortedDataClauseOperands( + computeConstructOp.getDataClauseOperands()); + for (auto newClause : newDataClauseOperands) + insertInSortedOrder(sortedDataClauseOperands, newClause.getDefiningOp()); + + // 7) Generate the data exit operations. + generateDataExitOperations(builder, computeConstructOp, newDataClauseOperands, + sortedDataClauseOperands); + + // 8) Add all of the new operands to the compute construct op. + assert(newPrivateOperands.size() == newPrivateRecipeSyms.size() && + "sizes must match"); + if constexpr (!std::is_same_v && + !std::is_same_v) + addNewPrivateOperands(computeConstructOp, newPrivateOperands, + newPrivateRecipeSyms); + + computeConstructOp.getDataClauseOperandsMutable().assign( + sortedDataClauseOperands); +} + +void ACCImplicitData::runOnOperation() { + ModuleOp module = this->getOperation(); + module.walk([&](Operation *op) { + if (isa(op)) { + assert(op->getNumRegions() == 1 && "must have 1 region"); + + auto defaultClause = acc::getDefaultAttr(op); + llvm::TypeSwitch(op) + .Case( + [&](auto op) { + generateImplicitDataOps(module, op, defaultClause); + }) + .Default([&](Operation *) {}); + } + }); +} + +} // namespace diff --git a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt index 7d934956089a5..f8fff5958f8c7 100644 --- a/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/OpenACC/Transforms/CMakeLists.txt @@ -1,4 +1,5 @@ add_mlir_dialect_library(MLIROpenACCTransforms + ACCImplicitData.cpp LegalizeDataValues.cpp ADDITIONAL_HEADER_DIRS @@ -14,7 +15,10 @@ add_mlir_dialect_library(MLIROpenACCTransforms MLIROpenACCTypeInterfacesIncGen LINK_LIBS PUBLIC + MLIRAnalysis + MLIROpenACCAnalysis MLIROpenACCDialect + MLIROpenACCUtils MLIRFuncDialect MLIRIR MLIRPass diff --git a/mlir/test/Dialect/OpenACC/acc-implicit-data-reduction.mlir b/mlir/test/Dialect/OpenACC/acc-implicit-data-reduction.mlir new file mode 100644 index 0000000000000..cff118b79aec2 --- /dev/null +++ b/mlir/test/Dialect/OpenACC/acc-implicit-data-reduction.mlir @@ -0,0 +1,109 @@ +// RUN: mlir-opt %s -acc-implicit-data=enable-implicit-reduction-copy=true -split-input-file | FileCheck %s --check-prefix=COPY +// RUN: mlir-opt %s -acc-implicit-data=enable-implicit-reduction-copy=false -split-input-file | FileCheck %s --check-prefix=FIRSTPRIVATE + +// Test case: scalar reduction variable in parallel loop +// When enable-implicit-reduction-copy=true: expect copyin/copyout for reduction variable +// When enable-implicit-reduction-copy=false: expect firstprivate for reduction variable + +acc.reduction.recipe @reduction_add_memref_i32 : memref reduction_operator init { +^bb0(%arg0: memref): + %c0_i32 = arith.constant 0 : i32 + %alloc = memref.alloca() : memref + memref.store %c0_i32, %alloc[] : memref + acc.yield %alloc : memref +} combiner { +^bb0(%arg0: memref, %arg1: memref): + %0 = memref.load %arg0[] : memref + %1 = memref.load %arg1[] : memref + %2 = arith.addi %0, %1 : i32 + memref.store %2, %arg0[] : memref + acc.yield %arg0 : memref +} + +func.func @test_reduction_implicit_copy() { + %c1_i32 = arith.constant 1 : i32 + %c100_i32 = arith.constant 100 : i32 + %c0_i32 = arith.constant 0 : i32 + %r = memref.alloca() : memref + memref.store %c0_i32, %r[] : memref + + acc.parallel { + %red_var = acc.reduction varPtr(%r : memref) -> memref {name = "r"} + acc.loop reduction(@reduction_add_memref_i32 -> %red_var : memref) control(%iv : i32) = (%c1_i32 : i32) to (%c100_i32 : i32) step (%c1_i32 : i32) { + %load = memref.load %red_var[] : memref + %add = arith.addi %load, %c1_i32 : i32 + memref.store %add, %red_var[] : memref + acc.yield + } attributes {independent = [#acc.device_type]} + acc.yield + } + return +} + +// When enable-implicit-reduction-copy=true: expect copyin/copyout for reduction variable +// COPY-LABEL: func.func @test_reduction_implicit_copy +// COPY: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref) -> memref {dataClause = #acc, implicit = true, name = ""} +// COPY: acc.copyout accPtr(%[[COPYIN]] : memref) to varPtr({{.*}} : memref) {dataClause = #acc, implicit = true, name = ""} + +// When enable-implicit-reduction-copy=false: expect firstprivate for reduction variable +// FIRSTPRIVATE-LABEL: func.func @test_reduction_implicit_copy +// FIRSTPRIVATE: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} +// FIRSTPRIVATE-NOT: acc.copyin +// FIRSTPRIVATE-NOT: acc.copyout + +// ----- + +// Test case: reduction variable used both in loop and outside +// Should be firstprivate regardless of the flag setting + +acc.reduction.recipe @reduction_add_memref_i32_2 : memref reduction_operator init { +^bb0(%arg0: memref): + %c0_i32 = arith.constant 0 : i32 + %alloc = memref.alloca() : memref + memref.store %c0_i32, %alloc[] : memref + acc.yield %alloc : memref +} combiner { +^bb0(%arg0: memref, %arg1: memref): + %0 = memref.load %arg0[] : memref + %1 = memref.load %arg1[] : memref + %2 = arith.addi %0, %1 : i32 + memref.store %2, %arg0[] : memref + acc.yield %arg0 : memref +} + +func.func @test_reduction_with_usage_outside_loop() { + %c1_i32 = arith.constant 1 : i32 + %c100_i32 = arith.constant 100 : i32 + %c0_i32 = arith.constant 0 : i32 + %r = memref.alloca() : memref + %out = memref.alloca() : memref + memref.store %c0_i32, %r[] : memref + + %out_create = acc.create varPtr(%out : memref) -> memref {dataClause = #acc, name = "out"} + acc.parallel dataOperands(%out_create : memref) { + %red_var = acc.reduction varPtr(%r : memref) -> memref {name = "r"} + acc.loop reduction(@reduction_add_memref_i32_2 -> %red_var : memref) control(%iv : i32) = (%c1_i32 : i32) to (%c100_i32 : i32) step (%c1_i32 : i32) { + %load = memref.load %red_var[] : memref + %add = arith.addi %load, %c1_i32 : i32 + memref.store %add, %red_var[] : memref + acc.yield + } attributes {independent = [#acc.device_type]} + // out = r (usage of r outside the loop) + %final_r = memref.load %r[] : memref + memref.store %final_r, %out_create[] : memref + acc.yield + } + acc.copyout accPtr(%out_create : memref) to varPtr(%out : memref) {dataClause = #acc, name = "out"} + return +} + +// In this case, r should be firstprivate regardless of the flag setting +// because it's used outside the reduction context +// COPY-LABEL: func.func @test_reduction_with_usage_outside_loop +// COPY: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} +// COPY-NOT: acc.copyin varPtr({{.*}} : memref) -> memref {{.*}} name = "" + +// FIRSTPRIVATE-LABEL: func.func @test_reduction_with_usage_outside_loop +// FIRSTPRIVATE: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} +// FIRSTPRIVATE-NOT: acc.copyin varPtr({{.*}} : memref) -> memref {{.*}} name = "" + diff --git a/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir b/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir new file mode 100644 index 0000000000000..cf09c33ca5197 --- /dev/null +++ b/mlir/test/Dialect/OpenACC/acc-implicit-data.mlir @@ -0,0 +1,224 @@ +// RUN: mlir-opt %s -acc-implicit-data -split-input-file | FileCheck %s + +// ----- + +// Test scalar in serial construct - should generate firstprivate +func.func @test_scalar_in_serial() { + %alloc = memref.alloca() : memref + acc.serial { + %load = memref.load %alloc[] : memref + acc.yield + } + return +} + +// CHECK-LABEL: func.func @test_scalar_in_serial +// CHECK: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} + +// ----- + +// Test scalar in parallel construct - should generate firstprivate +func.func @test_scalar_in_parallel() { + %alloc = memref.alloca() : memref + acc.parallel { + %load = memref.load %alloc[] : memref + acc.yield + } + return +} + +// CHECK-LABEL: func.func @test_scalar_in_parallel +// CHECK: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} + +// ----- + +// Test scalar in kernels construct - should generate copyin/copyout +func.func @test_scalar_in_kernels() { + %alloc = memref.alloca() : memref + acc.kernels { + %load = memref.load %alloc[] : memref + acc.terminator + } + return +} + +// CHECK-LABEL: func.func @test_scalar_in_kernels +// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref) -> memref {dataClause = #acc, implicit = true, name = ""} +// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref) to varPtr({{.*}} : memref) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test scalar in parallel with default(none) - should NOT generate implicit data +func.func @test_scalar_parallel_defaultnone() { + %alloc = memref.alloca() : memref + acc.parallel { + %load = memref.load %alloc[] : memref + acc.yield + } attributes {defaultAttr = #acc} + return +} + +// CHECK-LABEL: func.func @test_scalar_parallel_defaultnone +// CHECK-NOT: acc.firstprivate +// CHECK-NOT: acc.copyin + +// ----- + +// Test array in parallel - should generate copyin/copyout +func.func @test_array_in_parallel() { + %alloc = memref.alloca() : memref<10xf32> + acc.parallel { + %c0 = arith.constant 0 : index + %load = memref.load %alloc[%c0] : memref<10xf32> + acc.yield + } + return +} + +// CHECK-LABEL: func.func @test_array_in_parallel +// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<10xf32>) -> memref<10xf32> {dataClause = #acc, implicit = true, name = ""} +// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<10xf32>) to varPtr({{.*}} : memref<10xf32>) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test array in kernels - should generate copyin/copyout +func.func @test_array_in_kernels() { + %alloc = memref.alloca() : memref<20xi32> + acc.kernels { + %c0 = arith.constant 0 : index + %load = memref.load %alloc[%c0] : memref<20xi32> + acc.terminator + } + return +} + +// CHECK-LABEL: func.func @test_array_in_kernels +// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<20xi32>) -> memref<20xi32> {dataClause = #acc, implicit = true, name = ""} +// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<20xi32>) to varPtr({{.*}} : memref<20xi32>) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test array with default(present) - should generate present +func.func @test_array_parallel_defaultpresent() { + %alloc = memref.alloca() : memref<10xf32> + acc.parallel { + %c0 = arith.constant 0 : index + %load = memref.load %alloc[%c0] : memref<10xf32> + acc.yield + } attributes {defaultAttr = #acc} + return +} + +// CHECK-LABEL: func.func @test_array_parallel_defaultpresent +// CHECK: %[[PRESENT:.*]] = acc.present varPtr({{.*}} : memref<10xf32>) -> memref<10xf32> {implicit = true, name = ""} +// CHECK: acc.delete accPtr(%[[PRESENT]] : memref<10xf32>) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test scalar with default(present) - should still generate firstprivate (scalars ignore default(present)) +func.func @test_scalar_parallel_defaultpresent() { + %alloc = memref.alloca() : memref + acc.parallel { + %load = memref.load %alloc[] : memref + acc.yield + } attributes {defaultAttr = #acc} + return +} + +// CHECK-LABEL: func.func @test_scalar_parallel_defaultpresent +// CHECK: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} + +// ----- + +// Test multidimensional array +func.func @test_multidim_array_in_parallel() { + %alloc = memref.alloca() : memref<8x16xf32> + acc.parallel { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %load = memref.load %alloc[%c0, %c1] : memref<8x16xf32> + acc.yield + } + return +} + +// CHECK-LABEL: func.func @test_multidim_array_in_parallel +// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<8x16xf32>) -> memref<8x16xf32> {dataClause = #acc, implicit = true, name = ""} +// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<8x16xf32>) to varPtr({{.*}} : memref<8x16xf32>) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test dynamic size array +func.func @test_dynamic_array(%size: index) { + %alloc = memref.alloca(%size) : memref + acc.parallel { + %c0 = arith.constant 0 : index + %load = memref.load %alloc[%c0] : memref + acc.yield + } + return +} + +// CHECK-LABEL: func.func @test_dynamic_array +// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref) -> memref {dataClause = #acc, implicit = true, name = ""} +// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref) to varPtr({{.*}} : memref) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test variable with explicit data clause - implicit should recognize it +func.func @test_with_explicit_copyin() { + %alloc = memref.alloca() : memref<100xf32> + %copyin = acc.copyin varPtr(%alloc : memref<100xf32>) -> memref<100xf32> {name = "explicit"} + acc.parallel dataOperands(%copyin : memref<100xf32>) { + %c0 = arith.constant 0 : index + %load = memref.load %alloc[%c0] : memref<100xf32> + acc.yield + } + acc.copyout accPtr(%copyin : memref<100xf32>) to varPtr(%alloc : memref<100xf32>) {name = "explicit"} + return +} + +// CHECK-LABEL: func.func @test_with_explicit_copyin +// CHECK: acc.present varPtr({{.*}} : memref<100xf32>) -> memref<100xf32> {implicit = true, name = ""} + +// ----- + +// Test multiple variables +func.func @test_multiple_variables() { + %alloc1 = memref.alloca() : memref + %alloc2 = memref.alloca() : memref<10xi32> + acc.parallel { + %load1 = memref.load %alloc1[] : memref + %c0 = arith.constant 0 : index + %load2 = memref.load %alloc2[%c0] : memref<10xi32> + acc.yield + } + return +} + +// CHECK-LABEL: func.func @test_multiple_variables +// CHECK: acc.firstprivate varPtr({{.*}} : memref) -> memref {implicit = true, name = ""} +// CHECK: %[[COPYIN:.*]] = acc.copyin varPtr({{.*}} : memref<10xi32>) -> memref<10xi32> {dataClause = #acc, implicit = true, name = ""} +// CHECK: acc.copyout accPtr(%[[COPYIN]] : memref<10xi32>) to varPtr({{.*}} : memref<10xi32>) {dataClause = #acc, implicit = true, name = ""} + +// ----- + +// Test memref.view aliasing - view of explicitly copied buffer should generate present +func.func @test_memref_view(%size: index) { + %c0 = arith.constant 0 : index + %buffer = memref.alloca(%size) : memref + %copyin = acc.copyin varPtr(%buffer : memref) -> memref {name = "buffer"} + %view = memref.view %buffer[%c0][] : memref to memref<8x64xf32> + acc.kernels dataOperands(%copyin : memref) { + %c0_0 = arith.constant 0 : index + %c0_1 = arith.constant 0 : index + %load = memref.load %view[%c0_0, %c0_1] : memref<8x64xf32> + acc.terminator + } + acc.copyout accPtr(%copyin : memref) to varPtr(%buffer : memref) {name = "buffer"} + return +} + +// CHECK-LABEL: func.func @test_memref_view +// CHECK: acc.present varPtr({{.*}} : memref<8x64xf32>) -> memref<8x64xf32> {implicit = true, name = ""} +