-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang][openacc] Lower DO CONCURRENT with acc loop #79223
Conversation
Lower basic DO CONCURRENT with acc loop construct. The DO CONCURRENT is lowered to an acc.loop operation. This does not currently cover the DO CONCURRENT with locality specs.
@llvm/pr-subscribers-openacc @llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesLower basic DO CONCURRENT with acc loop construct. The DO CONCURRENT is lowered to an acc.loop operation. This does not currently cover the DO CONCURRENT with locality specs. Full diff: https://github.com/llvm/llvm-project/pull/79223.diff 2 Files Affected:
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp
index d619d47fc23597..4f06436d38739c 100644
--- a/flang/lib/Lower/OpenACC.cpp
+++ b/flang/lib/Lower/OpenACC.cpp
@@ -1607,6 +1607,48 @@ mlir::Type getTypeFromIvTypeSize(fir::FirOpBuilder &builder,
return builder.getIntegerType(ivTypeSize * 8);
}
+static void privatizeIv(Fortran::lower::AbstractConverter &converter,
+ const Fortran::semantics::Symbol &sym,
+ mlir::Location loc,
+ llvm::SmallVector<mlir::Type> &ivTypes,
+ llvm::SmallVector<mlir::Location> &ivLocs,
+ llvm::SmallVector<mlir::Value> &privateOperands,
+ llvm::SmallVector<mlir::Value> &ivPrivate,
+ llvm::SmallVector<mlir::Attribute> &privatizations,
+ bool isDoConcurrent = false) {
+ fir::FirOpBuilder &builder = converter.getFirOpBuilder();
+
+ mlir::Type ivTy = getTypeFromIvTypeSize(builder, sym);
+ ivTypes.push_back(ivTy);
+ ivLocs.push_back(loc);
+ mlir::Value ivValue = converter.getSymbolAddress(sym);
+ if (!ivValue && isDoConcurrent) {
+ // DO CONCURRENT induction variables are not mapped yet since they are local
+ // to the DO CONCURRENT scope.
+ mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
+ builder.setInsertionPointToStart(builder.getAllocaBlock());
+ ivValue = builder.createTemporaryAlloc(loc, ivTy, toStringRef(sym.name()));
+ builder.restoreInsertionPoint(insPt);
+ }
+
+ std::string recipeName =
+ fir::getTypeAsString(ivValue.getType(), converter.getKindMap(),
+ Fortran::lower::privatizationRecipePrefix);
+ auto recipe = Fortran::lower::createOrGetPrivateRecipe(
+ builder, recipeName, loc, ivValue.getType());
+
+ std::stringstream asFortran;
+ auto op = createDataEntryOp<mlir::acc::PrivateOp>(
+ builder, loc, ivValue, asFortran, {}, true, /*implicit=*/true,
+ mlir::acc::DataClause::acc_private, ivValue.getType());
+
+ privateOperands.push_back(op.getAccPtr());
+ ivPrivate.push_back(op.getAccPtr());
+ privatizations.push_back(mlir::SymbolRefAttr::get(builder.getContext(),
+ recipe.getSymName().str()));
+ converter.bindSymbol(sym, op.getAccPtr());
+}
+
static mlir::acc::LoopOp
createLoopOp(Fortran::lower::AbstractConverter &converter,
mlir::Location currentLocation,
@@ -1640,68 +1682,86 @@ createLoopOp(Fortran::lower::AbstractConverter &converter,
llvm::SmallVector<mlir::Location> ivLocs;
llvm::SmallVector<bool> inclusiveBounds;
- if (outerDoConstruct.IsDoConcurrent())
- TODO(currentLocation, "OpenACC loop with DO CONCURRENT");
-
llvm::SmallVector<mlir::Location> locs;
locs.push_back(currentLocation); // Location of the directive
-
- int64_t collapseValue = Fortran::lower::getCollapseValue(accClauseList);
Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
- for (unsigned i = 0; i < collapseValue; ++i) {
- const Fortran::parser::LoopControl *loopControl;
- if (i == 0) {
- loopControl = &*outerDoConstruct.GetLoopControl();
+ bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();
+ if (isDoConcurrent) {
+ const Fortran::parser::LoopControl *loopControl =
+ &*outerDoConstruct.GetLoopControl();
+ const auto &concurrent =
+ std::get<Fortran::parser::LoopControl::Concurrent>(loopControl->u);
+ if (!std::get<std::list<Fortran::parser::LocalitySpec>>(concurrent.t)
+ .empty())
+ TODO(currentLocation, "DO CONCURRENT with locality spec");
+
+ const auto &concurrentHeader =
+ std::get<Fortran::parser::ConcurrentHeader>(concurrent.t);
+ const auto &controls =
+ std::get<std::list<Fortran::parser::ConcurrentControl>>(
+ concurrentHeader.t);
+ for (const auto &control : controls) {
locs.push_back(converter.genLocation(
Fortran::parser::FindSourceLocation(outerDoConstruct)));
- } else {
- auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
- assert(doCons && "expect do construct");
- loopControl = &*doCons->GetLoopControl();
- locs.push_back(
- converter.genLocation(Fortran::parser::FindSourceLocation(*doCons)));
+ lowerbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(std::get<1>(control.t)), stmtCtx)));
+ upperbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(std::get<2>(control.t)), stmtCtx)));
+ if (const auto &expr =
+ std::get<std::optional<Fortran::parser::ScalarIntExpr>>(
+ control.t))
+ steps.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(*expr), stmtCtx)));
+ else // If `step` is not present, assume it is `1`.
+ steps.push_back(builder.createIntegerConstant(
+ currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
+
+ const auto &name = std::get<Fortran::parser::Name>(control.t);
+ privatizeIv(converter, *name.symbol, currentLocation, ivTypes, ivLocs,
+ privateOperands, ivPrivate, privatizations, isDoConcurrent);
+
+ inclusiveBounds.push_back(true);
}
+ } else {
+ int64_t collapseValue = Fortran::lower::getCollapseValue(accClauseList);
+ for (unsigned i = 0; i < collapseValue; ++i) {
+ const Fortran::parser::LoopControl *loopControl;
+ if (i == 0) {
+ loopControl = &*outerDoConstruct.GetLoopControl();
+ locs.push_back(converter.genLocation(
+ Fortran::parser::FindSourceLocation(outerDoConstruct)));
+ } else {
+ auto *doCons = crtEval->getIf<Fortran::parser::DoConstruct>();
+ assert(doCons && "expect do construct");
+ loopControl = &*doCons->GetLoopControl();
+ locs.push_back(converter.genLocation(
+ Fortran::parser::FindSourceLocation(*doCons)));
+ }
- const Fortran::parser::LoopControl::Bounds *bounds =
- std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
- assert(bounds && "Expected bounds on the loop construct");
- lowerbounds.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
- upperbounds.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
- if (bounds->step)
- steps.push_back(fir::getBase(converter.genExprValue(
- *Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
- else // If `step` is not present, assume it as `1`.
- steps.push_back(builder.createIntegerConstant(
- currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
-
- Fortran::semantics::Symbol &ivSym =
- bounds->name.thing.symbol->GetUltimate();
-
- mlir::Type ivTy = getTypeFromIvTypeSize(builder, ivSym);
- mlir::Value ivValue = converter.getSymbolAddress(ivSym);
- ivTypes.push_back(ivTy);
- ivLocs.push_back(currentLocation);
- std::string recipeName =
- fir::getTypeAsString(ivValue.getType(), converter.getKindMap(),
- Fortran::lower::privatizationRecipePrefix);
- auto recipe = Fortran::lower::createOrGetPrivateRecipe(
- builder, recipeName, currentLocation, ivValue.getType());
- std::stringstream asFortran;
- auto op = createDataEntryOp<mlir::acc::PrivateOp>(
- builder, currentLocation, ivValue, asFortran, {}, true,
- /*implicit=*/true, mlir::acc::DataClause::acc_private,
- ivValue.getType());
-
- privateOperands.push_back(op.getAccPtr());
- ivPrivate.push_back(op.getAccPtr());
- privatizations.push_back(mlir::SymbolRefAttr::get(
- builder.getContext(), recipe.getSymName().str()));
- inclusiveBounds.push_back(true);
- converter.bindSymbol(ivSym, op.getAccPtr());
- if (i < collapseValue - 1)
- crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
+ const Fortran::parser::LoopControl::Bounds *bounds =
+ std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
+ assert(bounds && "Expected bounds on the loop construct");
+ lowerbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
+ upperbounds.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
+ if (bounds->step)
+ steps.push_back(fir::getBase(converter.genExprValue(
+ *Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
+ else // If `step` is not present, assume it is `1`.
+ steps.push_back(builder.createIntegerConstant(
+ currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));
+
+ Fortran::semantics::Symbol &ivSym =
+ bounds->name.thing.symbol->GetUltimate();
+ privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
+ privateOperands, ivPrivate, privatizations);
+
+ inclusiveBounds.push_back(true);
+
+ if (i < collapseValue - 1)
+ crtEval = &*std::next(crtEval->getNestedEvaluations().begin());
+ }
}
for (const Fortran::parser::AccClause &clause : accClauseList.v) {
diff --git a/flang/test/Lower/OpenACC/acc-loop.f90 b/flang/test/Lower/OpenACC/acc-loop.f90
index 6bd4dd61bd893b..28da9f99282ced 100644
--- a/flang/test/Lower/OpenACC/acc-loop.f90
+++ b/flang/test/Lower/OpenACC/acc-loop.f90
@@ -306,3 +306,23 @@ program acc_loop
! CHECK: acc.loop gang([#acc.device_type<nvidia>, #acc.device_type<default>])
end program
+
+subroutine sub1(i, j, k)
+ integer :: i,j,k
+ integer :: a(i,j,k)
+ !$acc parallel loop
+ do concurrent (i=1:10,j=1:100,k=1:200)
+ a(i,j,k) = a(i,j,k) + 1
+ end do
+end subroutine
+
+! CHECK: func.func @_QPsub1
+! CHECK: %[[DC_K:.*]] = fir.alloca i32 {bindc_name = "k"}
+! CHECK: %[[DC_J:.*]] = fir.alloca i32 {bindc_name = "j"}
+! CHECK: %[[DC_I:.*]] = fir.alloca i32 {bindc_name = "i"}
+! CHECK: acc.parallel
+! CHECK: %[[P_I:.*]] = acc.private varPtr(%[[DC_I]] : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = ""}
+! CHECK: %[[P_J:.*]] = acc.private varPtr(%[[DC_J]] : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = ""}
+! CHECK: %[[P_K:.*]] = acc.private varPtr(%[[DC_K]] : !fir.ref<i32>) -> !fir.ref<i32> {implicit = true, name = ""}
+! CHECK: acc.loop private(@privatization_ref_i32 -> %[[P_I]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[P_J]] : !fir.ref<i32>, @privatization_ref_i32 -> %[[P_K]] : !fir.ref<i32>) (%{{.*}} : i32, %{{.*}} : i32, %{{.*}} : i32) = (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32) to (%c10{{.*}}, %c100{{.*}}, %c200{{.*}} : i32, i32, i32) step (%c1{{.*}}, %c1{{.*}}, %c1{{.*}} : i32, i32, i32)
+! CHECK: } attributes {inclusiveUpperbound = array<i1: true, true, true>}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is great.
I think the acc.loop should also be marked with appropriate attribute as part of this change:
3353 2.17.2 Do Concurrent Construct
3354 This section refers to the Fortran do concurrent construct that is a form of do construct. When
3355 do concurrent appears without a loop construct in a kernels construct it is treated as if it is
3356 annotated with loop auto. If it appears in a parallel construct or an accelerator routine then
3357 it is treated as if it is annotated with loop independent.
I think this does not apply for DO CONCURRENT with acc loop construct.
So it does not apply for the lowering for acc loop construct but we should handle it when DO CONCURRENT does not have a loop construct annotation. |
You're right - I totally skimmed over that. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Excellent!
✅ With the latest revision this PR passed the C/C++ code formatter. |
Lower basic DO CONCURRENT with acc loop construct. The DO CONCURRENT is lowered to an acc.loop operation.
This does not currently cover the DO CONCURRENT with locality specs.