-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[OpenACC][CIR] Impl reduction recipe pointer/array bound lowering #161726
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Just like with private, the lowering for these bounds are all pretty trivial. This patch enables them for reduction, which has everything in common except the init pattern, but that is handled/managed by Sema. This also adds sufficient testing to spot-check the allocation/initialization/destruction/etc.
@llvm/pr-subscribers-clangir Author: Erich Keane (erichkeane) ChangesJust like with private, the lowering for these bounds are all pretty trivial. This patch enables them for reduction, which has everything in common except the init pattern, but that is handled/managed by Sema. This also adds sufficient testing to spot-check the allocation/initialization/destruction/etc. Patch is 722.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161726.diff 23 Files Affected:
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
index ea6ea2c63acc3..bbc45e5f92ca5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
@@ -427,22 +427,20 @@ void OpenACCRecipeBuilderBase::makeBoundsInit(
cgf.emitAutoVarInit(tempDeclEmission);
}
-// TODO: OpenACC: When we get this implemented for the reduction/firstprivate,
-// this might end up re-merging with createRecipeInitCopy. For now, keep it
-// separate until we're sure what everything looks like to keep this as clean
-// as possible.
-void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
+// TODO: OpenACC: when we start doing firstprivate for array/vlas/etc, we
+// probably need to do a little work about the 'init' calls to put it in 'copy'
+// region instead.
+void OpenACCRecipeBuilderBase::createInitRecipe(
mlir::Location loc, mlir::Location locEnd, SourceRange exprRange,
- mlir::Value mainOp, mlir::acc::PrivateRecipeOp recipe, size_t numBounds,
+ mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds,
llvm::ArrayRef<QualType> boundTypes, const VarDecl *allocaDecl,
QualType origType) {
assert(allocaDecl && "Required recipe variable not set?");
CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, allocaDecl};
- mlir::Block *block =
- createRecipeBlock(recipe.getInitRegion(), mainOp.getType(), loc,
- numBounds, /*isInit=*/true);
- builder.setInsertionPointToEnd(&recipe.getInitRegion().back());
+ mlir::Block *block = createRecipeBlock(recipeInitRegion, mainOp.getType(),
+ loc, numBounds, /*isInit=*/true);
+ builder.setInsertionPointToEnd(&recipeInitRegion.back());
CIRGenFunction::LexicalScope ls(cgf, loc, block);
const Type *allocaPointeeType =
@@ -458,7 +456,7 @@ void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
// Sema::TentativeAnalysisScopes in SemaOpenACC::CreateInitRecipe, it'll
// emit an error to tell us. However, emitting those errors during
// production is a violation of the standard, so we cannot do them.
- cgf.cgm.errorNYI(exprRange, "private default-init recipe");
+ cgf.cgm.errorNYI(exprRange, "private/reduction default-init recipe");
}
if (!numBounds) {
@@ -469,7 +467,7 @@ void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
cgf.emitAutoVarInit(tempDeclEmission);
} else {
mlir::Value alloca = makeBoundsAlloca(
- block, exprRange, loc, "openacc.private.init", numBounds, boundTypes);
+ block, exprRange, loc, allocaDecl->getName(), numBounds, boundTypes);
// If the initializer is trivial, there is nothing to do here, so save
// ourselves some effort.
@@ -521,10 +519,10 @@ void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy(
// doesn't restore it aftewards.
void OpenACCRecipeBuilderBase::createReductionRecipeCombiner(
mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp,
- mlir::acc::ReductionRecipeOp recipe) {
- mlir::Block *block = builder.createBlock(
- &recipe.getCombinerRegion(), recipe.getCombinerRegion().end(),
- {mainOp.getType(), mainOp.getType()}, {loc, loc});
+ mlir::acc::ReductionRecipeOp recipe, size_t numBounds) {
+ mlir::Block *block =
+ createRecipeBlock(recipe.getCombinerRegion(), mainOp.getType(), loc,
+ numBounds, /*isInit=*/false);
builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back());
CIRGenFunction::LexicalScope ls(cgf, loc, block);
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
index a05b0bdaf6774..21707ad137f77 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
@@ -64,13 +64,13 @@ class OpenACCRecipeBuilderBase {
// doesn't restore it aftewards.
void createReductionRecipeCombiner(mlir::Location loc, mlir::Location locEnd,
mlir::Value mainOp,
- mlir::acc::ReductionRecipeOp recipe);
- void createPrivateInitRecipe(mlir::Location loc, mlir::Location locEnd,
- SourceRange exprRange, mlir::Value mainOp,
- mlir::acc::PrivateRecipeOp recipe,
- size_t numBounds,
- llvm::ArrayRef<QualType> boundTypes,
- const VarDecl *allocaDecl, QualType origType);
+ mlir::acc::ReductionRecipeOp recipe,
+ size_t numBounds);
+ void createInitRecipe(mlir::Location loc, mlir::Location locEnd,
+ SourceRange exprRange, mlir::Value mainOp,
+ mlir::Region &recipeInitRegion, size_t numBounds,
+ llvm::ArrayRef<QualType> boundTypes,
+ const VarDecl *allocaDecl, QualType origType);
void createRecipeDestroySection(mlir::Location loc, mlir::Location locEnd,
mlir::Value mainOp, CharUnits alignment,
@@ -224,10 +224,10 @@ class OpenACCRecipeBuilder : OpenACCRecipeBuilderBase {
// TODO: OpenACC: This is a bit of a hackery to get this to not change for
// the non-private recipes. This will be removed soon, when we get this
// 'right' for firstprivate and reduction.
- if constexpr (!std::is_same_v<RecipeTy, mlir::acc::PrivateRecipeOp>) {
+ if constexpr (std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>) {
if (numBounds) {
cgf.cgm.errorNYI(varRef->getSourceRange(),
- "firstprivate/reduction-init with bounds");
+ "firstprivate-init with bounds");
}
boundTypes = {};
numBounds = 0;
@@ -260,18 +260,25 @@ class OpenACCRecipeBuilder : OpenACCRecipeBuilderBase {
insertLocation = modBuilder.saveInsertionPoint();
if constexpr (std::is_same_v<RecipeTy, mlir::acc::PrivateRecipeOp>) {
- createPrivateInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
- recipe, numBounds, boundTypes, varRecipe,
- origType);
+ createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
+ recipe.getInitRegion(), numBounds, boundTypes, varRecipe,
+ origType);
+ } else if constexpr (std::is_same_v<RecipeTy,
+ mlir::acc::ReductionRecipeOp>) {
+ createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
+ recipe.getInitRegion(), numBounds, boundTypes, varRecipe,
+ origType);
+ createReductionRecipeCombiner(loc, locEnd, mainOp, recipe, numBounds);
} else {
+ static_assert(std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>);
+ // TODO: OpenACC: we probably want this to call createInitRecipe as well,
+ // but do so in a way that omits the 'initialization', so that we can do
+ // it separately, since it belongs in the 'copy' region. It also might
+ // need a way of getting the tempDeclEmission out of it for that purpose.
createRecipeInitCopy(loc, locEnd, varRef->getSourceRange(), mainOp,
recipe, varRecipe, temporary);
}
- if constexpr (std::is_same_v<RecipeTy, mlir::acc::ReductionRecipeOp>) {
- createReductionRecipeCombiner(loc, locEnd, mainOp, recipe);
- }
-
if (origType.isDestructedType())
createRecipeDestroySection(
loc, locEnd, mainOp, cgf.getContext().getDeclAlign(varRecipe),
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index 9aaf7f463403d..43d8eb4163a27 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -2894,15 +2894,19 @@ SemaOpenACC::CreateFirstPrivateInitRecipe(const Expr *VarExpr) {
OpenACCReductionRecipe SemaOpenACC::CreateReductionInitRecipe(
OpenACCReductionOperator ReductionOperator, const Expr *VarExpr) {
- // TODO: OpenACC: This shouldn't be necessary, see PrivateInitRecipe
- VarExpr = StripOffBounds(VarExpr);
-
+ // We don't strip bounds here, so that we are doing our recipe init at the
+ // 'lowest' possible level. Codegen is going to have to do its own 'looping'.
if (!VarExpr || VarExpr->getType()->isDependentType())
return OpenACCReductionRecipe::Empty();
QualType VarTy =
VarExpr->getType().getNonReferenceType().getUnqualifiedType();
+ // Array sections are special, and we have to treat them that way.
+ if (const auto *ASE =
+ dyn_cast<ArraySectionExpr>(VarExpr->IgnoreParenImpCasts()))
+ VarTy = ArraySectionExpr::getBaseOriginalType(ASE);
+
// TODO: OpenACC: for arrays/bounds versions, we're going to have to do a
// different initializer, but for now we can go ahead with this.
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp
index 3d295d58d1026..36d8c5ed4d7c5 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct DefaultOperators {
int i;
@@ -944,22 +944,436 @@ void acc_combined() {
for(int i=0;i < 5; ++i);
#pragma acc parallel loop reduction(+:someVarArr[2])
+// CHECK-NEXT: acc.reduction.recipe @reduction_add__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <add> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"]
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #false
+// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: acc.yield
+// CHECK-NEXT: } combiner {
+// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
+// CHECK-NEXT: }
for(int i=0;i < 5; ++i);
#pragma acc parallel loop reduction(*:someVarArr[2])
+// CHECK-NEXT: acc.reduction.recipe @reduction_mul__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"]
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_U]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.fp<1{{.*}}> : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.fp<1{{.*}}> : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #true
+// CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: acc.yield
+// CHECK-NEXT: } combiner {
+// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
+// CHECK-NEXT: }
for(int i=0;i < 5; ++i);
#pragma acc parallel loop reduction(max:someVarArr[2])
+// CHECK-NEXT: acc.reduction.recipe @reduction_max__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"]
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<0> : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_U]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.fp<-3.4{{.*}}E+38> : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.fp<-1.7{{.*}}E+308> : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_B:.*]] = cir.get_mem...
[truncated]
|
@llvm/pr-subscribers-clang Author: Erich Keane (erichkeane) ChangesJust like with private, the lowering for these bounds are all pretty trivial. This patch enables them for reduction, which has everything in common except the init pattern, but that is handled/managed by Sema. This also adds sufficient testing to spot-check the allocation/initialization/destruction/etc. Patch is 722.93 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/161726.diff 23 Files Affected:
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
index ea6ea2c63acc3..bbc45e5f92ca5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.cpp
@@ -427,22 +427,20 @@ void OpenACCRecipeBuilderBase::makeBoundsInit(
cgf.emitAutoVarInit(tempDeclEmission);
}
-// TODO: OpenACC: When we get this implemented for the reduction/firstprivate,
-// this might end up re-merging with createRecipeInitCopy. For now, keep it
-// separate until we're sure what everything looks like to keep this as clean
-// as possible.
-void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
+// TODO: OpenACC: when we start doing firstprivate for array/vlas/etc, we
+// probably need to do a little work about the 'init' calls to put it in 'copy'
+// region instead.
+void OpenACCRecipeBuilderBase::createInitRecipe(
mlir::Location loc, mlir::Location locEnd, SourceRange exprRange,
- mlir::Value mainOp, mlir::acc::PrivateRecipeOp recipe, size_t numBounds,
+ mlir::Value mainOp, mlir::Region &recipeInitRegion, size_t numBounds,
llvm::ArrayRef<QualType> boundTypes, const VarDecl *allocaDecl,
QualType origType) {
assert(allocaDecl && "Required recipe variable not set?");
CIRGenFunction::DeclMapRevertingRAII declMapRAII{cgf, allocaDecl};
- mlir::Block *block =
- createRecipeBlock(recipe.getInitRegion(), mainOp.getType(), loc,
- numBounds, /*isInit=*/true);
- builder.setInsertionPointToEnd(&recipe.getInitRegion().back());
+ mlir::Block *block = createRecipeBlock(recipeInitRegion, mainOp.getType(),
+ loc, numBounds, /*isInit=*/true);
+ builder.setInsertionPointToEnd(&recipeInitRegion.back());
CIRGenFunction::LexicalScope ls(cgf, loc, block);
const Type *allocaPointeeType =
@@ -458,7 +456,7 @@ void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
// Sema::TentativeAnalysisScopes in SemaOpenACC::CreateInitRecipe, it'll
// emit an error to tell us. However, emitting those errors during
// production is a violation of the standard, so we cannot do them.
- cgf.cgm.errorNYI(exprRange, "private default-init recipe");
+ cgf.cgm.errorNYI(exprRange, "private/reduction default-init recipe");
}
if (!numBounds) {
@@ -469,7 +467,7 @@ void OpenACCRecipeBuilderBase::createPrivateInitRecipe(
cgf.emitAutoVarInit(tempDeclEmission);
} else {
mlir::Value alloca = makeBoundsAlloca(
- block, exprRange, loc, "openacc.private.init", numBounds, boundTypes);
+ block, exprRange, loc, allocaDecl->getName(), numBounds, boundTypes);
// If the initializer is trivial, there is nothing to do here, so save
// ourselves some effort.
@@ -521,10 +519,10 @@ void OpenACCRecipeBuilderBase::createFirstprivateRecipeCopy(
// doesn't restore it aftewards.
void OpenACCRecipeBuilderBase::createReductionRecipeCombiner(
mlir::Location loc, mlir::Location locEnd, mlir::Value mainOp,
- mlir::acc::ReductionRecipeOp recipe) {
- mlir::Block *block = builder.createBlock(
- &recipe.getCombinerRegion(), recipe.getCombinerRegion().end(),
- {mainOp.getType(), mainOp.getType()}, {loc, loc});
+ mlir::acc::ReductionRecipeOp recipe, size_t numBounds) {
+ mlir::Block *block =
+ createRecipeBlock(recipe.getCombinerRegion(), mainOp.getType(), loc,
+ numBounds, /*isInit=*/false);
builder.setInsertionPointToEnd(&recipe.getCombinerRegion().back());
CIRGenFunction::LexicalScope ls(cgf, loc, block);
diff --git a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
index a05b0bdaf6774..21707ad137f77 100644
--- a/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
+++ b/clang/lib/CIR/CodeGen/CIRGenOpenACCRecipe.h
@@ -64,13 +64,13 @@ class OpenACCRecipeBuilderBase {
// doesn't restore it aftewards.
void createReductionRecipeCombiner(mlir::Location loc, mlir::Location locEnd,
mlir::Value mainOp,
- mlir::acc::ReductionRecipeOp recipe);
- void createPrivateInitRecipe(mlir::Location loc, mlir::Location locEnd,
- SourceRange exprRange, mlir::Value mainOp,
- mlir::acc::PrivateRecipeOp recipe,
- size_t numBounds,
- llvm::ArrayRef<QualType> boundTypes,
- const VarDecl *allocaDecl, QualType origType);
+ mlir::acc::ReductionRecipeOp recipe,
+ size_t numBounds);
+ void createInitRecipe(mlir::Location loc, mlir::Location locEnd,
+ SourceRange exprRange, mlir::Value mainOp,
+ mlir::Region &recipeInitRegion, size_t numBounds,
+ llvm::ArrayRef<QualType> boundTypes,
+ const VarDecl *allocaDecl, QualType origType);
void createRecipeDestroySection(mlir::Location loc, mlir::Location locEnd,
mlir::Value mainOp, CharUnits alignment,
@@ -224,10 +224,10 @@ class OpenACCRecipeBuilder : OpenACCRecipeBuilderBase {
// TODO: OpenACC: This is a bit of a hackery to get this to not change for
// the non-private recipes. This will be removed soon, when we get this
// 'right' for firstprivate and reduction.
- if constexpr (!std::is_same_v<RecipeTy, mlir::acc::PrivateRecipeOp>) {
+ if constexpr (std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>) {
if (numBounds) {
cgf.cgm.errorNYI(varRef->getSourceRange(),
- "firstprivate/reduction-init with bounds");
+ "firstprivate-init with bounds");
}
boundTypes = {};
numBounds = 0;
@@ -260,18 +260,25 @@ class OpenACCRecipeBuilder : OpenACCRecipeBuilderBase {
insertLocation = modBuilder.saveInsertionPoint();
if constexpr (std::is_same_v<RecipeTy, mlir::acc::PrivateRecipeOp>) {
- createPrivateInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
- recipe, numBounds, boundTypes, varRecipe,
- origType);
+ createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
+ recipe.getInitRegion(), numBounds, boundTypes, varRecipe,
+ origType);
+ } else if constexpr (std::is_same_v<RecipeTy,
+ mlir::acc::ReductionRecipeOp>) {
+ createInitRecipe(loc, locEnd, varRef->getSourceRange(), mainOp,
+ recipe.getInitRegion(), numBounds, boundTypes, varRecipe,
+ origType);
+ createReductionRecipeCombiner(loc, locEnd, mainOp, recipe, numBounds);
} else {
+ static_assert(std::is_same_v<RecipeTy, mlir::acc::FirstprivateRecipeOp>);
+ // TODO: OpenACC: we probably want this to call createInitRecipe as well,
+ // but do so in a way that omits the 'initialization', so that we can do
+ // it separately, since it belongs in the 'copy' region. It also might
+ // need a way of getting the tempDeclEmission out of it for that purpose.
createRecipeInitCopy(loc, locEnd, varRef->getSourceRange(), mainOp,
recipe, varRecipe, temporary);
}
- if constexpr (std::is_same_v<RecipeTy, mlir::acc::ReductionRecipeOp>) {
- createReductionRecipeCombiner(loc, locEnd, mainOp, recipe);
- }
-
if (origType.isDestructedType())
createRecipeDestroySection(
loc, locEnd, mainOp, cgf.getContext().getDeclAlign(varRecipe),
diff --git a/clang/lib/Sema/SemaOpenACC.cpp b/clang/lib/Sema/SemaOpenACC.cpp
index 9aaf7f463403d..43d8eb4163a27 100644
--- a/clang/lib/Sema/SemaOpenACC.cpp
+++ b/clang/lib/Sema/SemaOpenACC.cpp
@@ -2894,15 +2894,19 @@ SemaOpenACC::CreateFirstPrivateInitRecipe(const Expr *VarExpr) {
OpenACCReductionRecipe SemaOpenACC::CreateReductionInitRecipe(
OpenACCReductionOperator ReductionOperator, const Expr *VarExpr) {
- // TODO: OpenACC: This shouldn't be necessary, see PrivateInitRecipe
- VarExpr = StripOffBounds(VarExpr);
-
+ // We don't strip bounds here, so that we are doing our recipe init at the
+ // 'lowest' possible level. Codegen is going to have to do its own 'looping'.
if (!VarExpr || VarExpr->getType()->isDependentType())
return OpenACCReductionRecipe::Empty();
QualType VarTy =
VarExpr->getType().getNonReferenceType().getUnqualifiedType();
+ // Array sections are special, and we have to treat them that way.
+ if (const auto *ASE =
+ dyn_cast<ArraySectionExpr>(VarExpr->IgnoreParenImpCasts()))
+ VarTy = ArraySectionExpr::getBaseOriginalType(ASE);
+
// TODO: OpenACC: for arrays/bounds versions, we're going to have to do a
// different initializer, but for now we can go ahead with this.
diff --git a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp
index 3d295d58d1026..36d8c5ed4d7c5 100644
--- a/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp
+++ b/clang/test/CIR/CodeGenOpenACC/combined-reduction-clause-default-ops.cpp
@@ -1,4 +1,4 @@
-// RUN: not %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenacc -triple x86_64-linux-gnu -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir -triple x86_64-linux-pc %s -o - | FileCheck %s
struct DefaultOperators {
int i;
@@ -944,22 +944,436 @@ void acc_combined() {
for(int i=0;i < 5; ++i);
#pragma acc parallel loop reduction(+:someVarArr[2])
+// CHECK-NEXT: acc.reduction.recipe @reduction_add__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <add> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"]
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.int<0> : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_U]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #cir.fp<0{{.*}}> : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[ZERO:.*]] = cir.const #false
+// CHECK-NEXT: cir.store {{.*}} %[[ZERO]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: acc.yield
+// CHECK-NEXT: } combiner {
+// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
+// CHECK-NEXT: }
for(int i=0;i < 5; ++i);
#pragma acc parallel loop reduction(*:someVarArr[2])
+// CHECK-NEXT: acc.reduction.recipe @reduction_mul__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <mul> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"]
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.int<1> : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_U]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.fp<1{{.*}}> : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #cir.fp<1{{.*}}> : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_B:.*]] = cir.get_member %[[STRIDE]][4] {name = "b"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.bool>
+// CHECK-NEXT: %[[ONE:.*]] = cir.const #true
+// CHECK-NEXT: cir.store {{.*}} %[[ONE]], %[[GET_B]] : !cir.bool, !cir.ptr<!cir.bool>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: } step {
+// CHECK-NEXT: %[[ITR_LOAD]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[INC:.*]] = cir.unary(inc, %[[ITR_LOAD]]) : !u64i, !u64i
+// CHECK-NEXT: cir.store %[[INC]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.yield
+// CHECK-NEXT: }
+// CHECK-NEXT: }
+// CHECK-NEXT: acc.yield
+// CHECK-NEXT: } combiner {
+// CHECK-NEXT: ^bb0(%[[LHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[RHSARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> {{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: acc.yield %[[LHSARG]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>
+// CHECK-NEXT: }
for(int i=0;i < 5; ++i);
#pragma acc parallel loop reduction(max:someVarArr[2])
+// CHECK-NEXT: acc.reduction.recipe @reduction_max__Bcnt1__ZTSA5_16DefaultOperators : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> reduction_operator <max> init {
+// CHECK-NEXT: ^bb0(%[[ARG:.*]]: !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>{{.*}}, %[[BOUND1:.*]]: !acc.data_bounds_ty{{.*}}))
+// CHECK-NEXT: %[[ALLOCA:.*]] = cir.alloca !cir.array<!rec_DefaultOperators x 5>, !cir.ptr<!cir.array<!rec_DefaultOperators x 5>>, ["openacc.reduction.init"]
+// CHECK-NEXT: cir.scope {
+// CHECK-NEXT: %[[LB:.*]] = acc.get_lowerbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[LB_CAST:.*]] = builtin.unrealized_conversion_cast %[[LB]] : index to !u64i
+// CHECK-NEXT: %[[UB:.*]] = acc.get_upperbound %[[BOUND1]] : (!acc.data_bounds_ty) -> index
+// CHECK-NEXT: %[[UB_CAST:.*]] = builtin.unrealized_conversion_cast %[[UB]] : index to !u64i
+// CHECK-NEXT: %[[ITR:.*]] = cir.alloca !u64i, !cir.ptr<!u64i>, ["iter"] {alignment = 8 : i64}
+// CHECK-NEXT: cir.store %[[LB_CAST]], %[[ITR]] : !u64i, !cir.ptr<!u64i>
+// CHECK-NEXT: cir.for : cond {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[COND:.*]] = cir.cmp(lt, %[[ITR_LOAD]], %[[UB_CAST]]) : !u64i, !cir.bool
+// CHECK-NEXT: cir.condition(%[[COND]])
+// CHECK-NEXT: } body {
+// CHECK-NEXT: %[[ITR_LOAD:.*]] = cir.load %[[ITR]] : !cir.ptr<!u64i>, !u64i
+// CHECK-NEXT: %[[DECAY:.*]] = cir.cast array_to_ptrdecay %[[ALLOCA]] : !cir.ptr<!cir.array<!rec_DefaultOperators x 5>> -> !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[STRIDE:.*]] = cir.ptr_stride(%[[DECAY]] : !cir.ptr<!rec_DefaultOperators>, %[[ITR_LOAD]] : !u64i), !cir.ptr<!rec_DefaultOperators>
+// CHECK-NEXT: %[[GET_I:.*]] = cir.get_member %[[STRIDE]][0] {name = "i"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!s32i>
+// CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<-2147483648> : !s32i
+// CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_I]] : !s32i, !cir.ptr<!s32i>
+// CHECK-NEXT: %[[GET_U:.*]] = cir.get_member %[[STRIDE]][1] {name = "u"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!u32i>
+// CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.int<0> : !u32i
+// CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_U]] : !u32i, !cir.ptr<!u32i>
+// CHECK-NEXT: %[[GET_F:.*]] = cir.get_member %[[STRIDE]][2] {name = "f"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.fp<-3.4{{.*}}E+38> : !cir.float
+// CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_F]] : !cir.float, !cir.ptr<!cir.float>
+// CHECK-NEXT: %[[GET_D:.*]] = cir.get_member %[[STRIDE]][3] {name = "d"} : !cir.ptr<!rec_DefaultOperators> -> !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[LEAST:.*]] = cir.const #cir.fp<-1.7{{.*}}E+308> : !cir.double
+// CHECK-NEXT: cir.store {{.*}} %[[LEAST]], %[[GET_D]] : !cir.double, !cir.ptr<!cir.double>
+// CHECK-NEXT: %[[GET_B:.*]] = cir.get_mem...
[truncated]
|
ba35dd9
to
92b5865
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
lgtm
…vm#161726) Just like with private, the lowering for these bounds are all pretty trivial. This patch enables them for reduction, which has everything in common except the init pattern, but that is handled/managed by Sema. This also adds sufficient testing to spot-check the allocation/initialization/destruction/etc.
Just like with private, the lowering for these bounds are all pretty trivial. This patch enables them for reduction, which has everything in common except the init pattern, but that is handled/managed by Sema.
This also adds sufficient testing to spot-check the allocation/initialization/destruction/etc.