-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang][openacc] Add loop expand pass #74045
Conversation
@llvm/pr-subscribers-flang-fir-hlfir @llvm/pr-subscribers-mlir Author: Valentin Clement (バレンタイン クレメン) (clementval) Changes
Full diff: https://github.com/llvm/llvm-project/pull/74045.diff 6 Files Affected:
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 92bc7246eca7005..6320690a785a85e 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -45,6 +45,7 @@ namespace fir {
#define GEN_PASS_DECL_ALGEBRAICSIMPLIFICATION
#define GEN_PASS_DECL_POLYMORPHICOPCONVERSION
#define GEN_PASS_DECL_OPENACCDATAOPERANDCONVERSION
+#define GEN_PASS_DECL_OPENACCLOOPEXPAND
#include "flang/Optimizer/Transforms/Passes.h.inc"
std::unique_ptr<mlir::Pass> createAbstractResultOnFuncOptPass();
@@ -79,6 +80,8 @@ std::unique_ptr<mlir::Pass> createOMPFunctionFilteringPass();
std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
createOMPMarkDeclareTargetPass();
+std::unique_ptr<mlir::Pass> createOpenACCLoopExpandPass();
+
std::unique_ptr<mlir::Pass> createVScaleAttrPass();
std::unique_ptr<mlir::Pass>
createVScaleAttrPass(std::pair<unsigned, unsigned> vscaleAttr);
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index c3768fd2d689c1a..c9f707ba084cb02 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -335,6 +335,14 @@ def OMPFunctionFiltering : Pass<"omp-function-filtering"> {
];
}
+def OpenACCLoopExpand : Pass<"acc-loop-expand", "mlir::func::FuncOp"> {
+ let summary = "";
+ let constructor = "::fir::createOpenACCLoopExpandPass()";
+ let dependentDialects = [
+ "fir::FIROpsDialect"
+ ];
+}
+
def VScaleAttr : Pass<"vscale-attr", "mlir::func::FuncOp"> {
let summary = "Add vscale_range attribute to functions";
let description = [{
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 03b67104a93b575..03303ee14d91790 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -19,6 +19,7 @@ add_flang_library(FIRTransforms
LoopVersioning.cpp
OMPFunctionFiltering.cpp
OMPMarkDeclareTarget.cpp
+ OpenACCLoopExpand.cpp
VScaleAttr.cpp
DEPENDS
diff --git a/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
new file mode 100644
index 000000000000000..3d712c9ea8d7a41
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
@@ -0,0 +1,170 @@
+//===- OpenACCLoopExpand.cpp - expand acc.loop operand to fir.do_loop nest ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/Support/FIRContext.h"
+#include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+namespace fir {
+#define GEN_PASS_DEF_OPENACCLOOPEXPAND
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+class LoopExpand : public fir::impl::OpenACCLoopExpandBase<LoopExpand> {
+public:
+ void runOnOperation() override;
+};
+
+static mlir::Value retrievePrivatizedIv(mlir::acc::LoopOp &op,
+ mlir::Value value) {
+ for (auto p : op.getPrivateOperands()) {
+ if (p == value) {
+ auto privateOp = mlir::cast<mlir::acc::PrivateOp>(p.getDefiningOp());
+ return privateOp.getVarPtr();
+ }
+ }
+ return mlir::Value{};
+}
+
+/// Reset operands and operand segments for the induction ranges.
+static void clearInductionRangesAndAttrs(fir::FirOpBuilder &builder,
+ mlir::acc::LoopOp &accLoopOp) {
+ // Remove the ranges.
+ accLoopOp.getLowerboundMutable().clear();
+ accLoopOp.getUpperboundMutable().clear();
+ accLoopOp.getStepMutable().clear();
+}
+
+static llvm::SmallVector<mlir::Value>
+getOriginalInductionVars(mlir::acc::LoopOp &accLoopOp) {
+ llvm::SmallVector<mlir::Value> ivs;
+ for (auto arg : accLoopOp.getBody().getArguments()) {
+ mlir::Value privateValue;
+ for (mlir::OpOperand &u : arg.getUses()) {
+ mlir::Operation *owner = u.getOwner();
+ if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(owner)) {
+ privateValue = storeOp.getMemref();
+ owner->erase();
+ }
+ }
+ mlir::Value originalIv = retrievePrivatizedIv(accLoopOp, privateValue);
+ assert(originalIv && "Expect induction variable to be found");
+ ivs.push_back(originalIv);
+ }
+ return ivs;
+}
+
+void LoopExpand::runOnOperation() {
+ mlir::func::FuncOp func = getOperation();
+
+ mlir::ModuleOp mod = func->getParentOfType<mlir::ModuleOp>();
+ fir::KindMapping kindMap = fir::getKindMapping(mod);
+ fir::FirOpBuilder builder{mod, std::move(kindMap)};
+
+ func.walk([&](mlir::acc::LoopOp accLoopOp) {
+ mlir::Location loc = accLoopOp.getLoc();
+ mlir::Type idxTy = builder.getIndexType();
+
+ bool isStructured = accLoopOp.getLoopRegions().front()->hasOneBlock();
+ bool finalCountValue = isStructured;
+ unsigned nbLoop = accLoopOp.getBody().getNumArguments();
+
+ // Gather original (non-privatized) induction variables.
+ llvm::SmallVector<mlir::Value> ivs = getOriginalInductionVars(accLoopOp);
+
+ // Remove block arguments in order to create loop-nest and move current body
+ // in the newly created loop nest.
+ accLoopOp.getBody().eraseArguments(0, nbLoop);
+ builder.setInsertionPointAfter(accLoopOp);
+
+ if (!isStructured) {
+ clearInductionRangesAndAttrs(builder, accLoopOp);
+ return;
+ }
+
+ llvm::SmallVector<mlir::Value> lbs, ubs, steps;
+ llvm::SmallVector<fir::DoLoopOp> loops;
+
+ // Create the loop nest, move the acc.loop body inside and move the loop
+ // nest inside the acc.loop again.
+ for (unsigned i = 0; i < nbLoop; ++i) {
+ bool isInnerLoop = i == (nbLoop - 1);
+
+ lbs.push_back(
+ builder.createConvert(loc, idxTy, accLoopOp.getLowerbound()[i]));
+ ubs.push_back(
+ builder.createConvert(loc, idxTy, accLoopOp.getUpperbound()[i]));
+ steps.push_back(
+ builder.createConvert(loc, idxTy, accLoopOp.getStep()[i]));
+ fir::DoLoopOp doLoopOp = builder.create<fir::DoLoopOp>(
+ loc, lbs[i], ubs[i], steps[i], /*unordered=*/false, finalCountValue,
+ mlir::ValueRange{accLoopOp.getLowerbound()[i]});
+ loops.push_back(doLoopOp);
+
+ if (isInnerLoop) {
+ // Move acc.loop body inside the newly created fir.do_loop.
+ accLoopOp.getBody().getTerminator()->erase();
+ doLoopOp.getRegion().takeBody(*accLoopOp.getLoopRegions().front());
+ // Recreate the block arguments.
+ doLoopOp.getBody()->addArgument(builder.getIndexType(), loc);
+ doLoopOp.getBody()->addArgument(accLoopOp.getLowerbound()[i].getType(),
+ loc);
+ } else {
+ builder.setInsertionPointToStart(doLoopOp.getBody());
+ }
+ }
+
+ // Move the loop nest inside the acc.loop region.
+ mlir::Block *newAccLoopBlock =
+ builder.createBlock(accLoopOp.getLoopRegions().front());
+ loops[0].getOperation()->moveBefore(newAccLoopBlock,
+ newAccLoopBlock->end());
+
+ for (unsigned i = 0; i < nbLoop; ++i) {
+ builder.setInsertionPointToStart(loops[i].getBody());
+ builder.create<fir::StoreOp>(loc, loops[i].getBody()->getArgument(1),
+ ivs[i]);
+
+ builder.setInsertionPointToEnd(loops[i].getBody());
+ llvm::SmallVector<mlir::Value, 2> results;
+ if (finalCountValue)
+ results.push_back(builder.create<mlir::arith::AddIOp>(
+ loc, loops[i].getInductionVar(), loops[i].getStep()));
+
+ // Step loopVariable to help optimizations such as vectorization.
+ // Induction variable elimination will clean up as necessary.
+ mlir::Value convStep = builder.create<fir::ConvertOp>(
+ loc, accLoopOp.getStep()[i].getType(), loops[i].getStep());
+ mlir::Value loopVar = builder.create<fir::LoadOp>(loc, ivs[i]);
+ results.push_back(
+ builder.create<mlir::arith::AddIOp>(loc, loopVar, convStep));
+ builder.create<fir::ResultOp>(loc, results);
+
+ // Convert ops have been created outside of the acc.loop operation. They
+ // need to be moved back before their uses.
+ lbs[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ ubs[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ steps[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ }
+
+ builder.setInsertionPointToEnd(newAccLoopBlock);
+ builder.create<mlir::acc::YieldOp>(loc);
+
+ clearInductionRangesAndAttrs(builder, accLoopOp);
+ });
+}
+
+std::unique_ptr<mlir::Pass> fir::createOpenACCLoopExpandPass() {
+ return std::make_unique<LoopExpand>();
+}
diff --git a/flang/test/Fir/OpenACC/loop-expand.f90 b/flang/test/Fir/OpenACC/loop-expand.f90
new file mode 100644
index 000000000000000..2efb2b2bd753355
--- /dev/null
+++ b/flang/test/Fir/OpenACC/loop-expand.f90
@@ -0,0 +1,118 @@
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | fir-opt --split-input-file --acc-loop-expand | FileCheck %s
+
+subroutine singleloop(a)
+ real :: a(:)
+ integer :: i
+ a = 0.0
+
+ !$acc loop
+ do i = 1, 10
+ a(i) = i
+ end do
+end subroutine
+! CHECK-LABEL: func.func @_QPsingleloop
+! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsingleloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private
+! CHECK: %[[LB0:.*]] = fir.convert %c1_i32 : (i32) -> index
+! CHECK: %[[UB0:.*]] = fir.convert %c10_i32 : (i32) -> index
+! CHECK: %[[STEP0:.*]] = fir.convert %c1_i32_0 : (i32) -> index
+! CHECK: %{{.*}} = fir.do_loop %[[ARG1:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG2:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG2]] to %2#1 : !fir.ref<i32>
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
+! CHECK: %[[CONV_STEP:.*]] = fir.convert %[[STEP0]] : (index) -> i32
+! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %[[CONV_STEP]] : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+
+subroutine single_loop_with_nest(a)
+ real :: a(:,:)
+ integer :: i, j
+ a = 0.0
+
+ !$acc loop
+ do i = 1, 10
+ do j = 1, 10
+ a(i, j) = i
+ end do
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsingle_loop_with_nest
+! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsingle_loop_with_nestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private
+! CHECK: %[[LB0:.*]] = fir.convert %c1_i32 : (i32) -> index
+! CHECK: %[[UB0:.*]] = fir.convert %c10_i32 : (i32) -> index
+! CHECK: %[[STEP0:.*]] = fir.convert %c1_i32_0 : (i32) -> index
+! CHECK: %{{.*}} = fir.do_loop %[[ARG1:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG2:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG2]] to %2#1 : !fir.ref<i32>
+! CHECK: fir.do_loop
+! CHECK: }
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
+! CHECK: %[[CONV_STEP:.*]] = fir.convert %[[STEP0]] : (index) -> i32
+! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %[[CONV_STEP]] : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+
+subroutine loop_with_nest(a)
+ real :: a(:,:)
+ integer :: i, j
+ a = 0.0
+
+ !$acc loop collapse(2)
+ do i = 1, 10
+ do j = 1, 10
+ a(i, j) = i
+ end do
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_with_nest
+! CHECK: %[[I:.*]]:2 = hlfir.declare %1 {uniq_name = "_QFloop_with_nestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[J:.*]]:2 = hlfir.declare %3 {uniq_name = "_QFloop_with_nestEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>, @privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK: %[[LB0:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[UB0:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[STEP0:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %{{.*}}:2 = fir.do_loop %[[ARG1:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG2:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG2]] to %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[LB1:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[UB1:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[STEP1:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %{{.*}}:2 = fir.do_loop %[[ARG3:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] iter_args(%[[ARG4:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG4]] to %[[J]]#1 : !fir.ref<i32>
+
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG3]], %[[STEP1]] : index
+! CHECK: %[[CONV_STEP1:.*]] = fir.convert %[[STEP1]] : (index) -> i32
+! CHECK: %[[LOAD_J:.*]] = fir.load %[[J]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_J]], %[[CONV_STEP1]] : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
+! CHECK: %[[CONV_STEP0:.*]] = fir.convert %[[STEP0]] : (index) -> i32
+! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %18 : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+
+subroutine loop_unstructured(a)
+ real :: a(:)
+ integer :: i
+ a = 0.0
+
+ !$acc loop
+ do i = 1, 10
+ if (a(i) > 0.0) stop 'stop'
+ a(i) = i
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_unstructured
+! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>)
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 391e77e0c4081a3..62ab100847619a2 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -1218,6 +1218,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
/// The i-th data operand passed.
Value getDataOperand(unsigned i);
+
+ Block &getBody() { return getLoopRegions().front()->front(); }
}];
let hasCustomAssemblyFormat = 1;
@@ -1237,7 +1239,7 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
)
$region
( `(` type($results)^ `)` )?
- attr-dict-with-keyword
+ attr-dict-with-keyword
}];
let hasVerifier = 1;
|
@llvm/pr-subscribers-openacc Author: Valentin Clement (バレンタイン クレメン) (clementval) Changes
Full diff: https://github.com/llvm/llvm-project/pull/74045.diff 6 Files Affected:
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.h b/flang/include/flang/Optimizer/Transforms/Passes.h
index 92bc7246eca7005..6320690a785a85e 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.h
+++ b/flang/include/flang/Optimizer/Transforms/Passes.h
@@ -45,6 +45,7 @@ namespace fir {
#define GEN_PASS_DECL_ALGEBRAICSIMPLIFICATION
#define GEN_PASS_DECL_POLYMORPHICOPCONVERSION
#define GEN_PASS_DECL_OPENACCDATAOPERANDCONVERSION
+#define GEN_PASS_DECL_OPENACCLOOPEXPAND
#include "flang/Optimizer/Transforms/Passes.h.inc"
std::unique_ptr<mlir::Pass> createAbstractResultOnFuncOptPass();
@@ -79,6 +80,8 @@ std::unique_ptr<mlir::Pass> createOMPFunctionFilteringPass();
std::unique_ptr<mlir::OperationPass<mlir::ModuleOp>>
createOMPMarkDeclareTargetPass();
+std::unique_ptr<mlir::Pass> createOpenACCLoopExpandPass();
+
std::unique_ptr<mlir::Pass> createVScaleAttrPass();
std::unique_ptr<mlir::Pass>
createVScaleAttrPass(std::pair<unsigned, unsigned> vscaleAttr);
diff --git a/flang/include/flang/Optimizer/Transforms/Passes.td b/flang/include/flang/Optimizer/Transforms/Passes.td
index c3768fd2d689c1a..c9f707ba084cb02 100644
--- a/flang/include/flang/Optimizer/Transforms/Passes.td
+++ b/flang/include/flang/Optimizer/Transforms/Passes.td
@@ -335,6 +335,14 @@ def OMPFunctionFiltering : Pass<"omp-function-filtering"> {
];
}
+def OpenACCLoopExpand : Pass<"acc-loop-expand", "mlir::func::FuncOp"> {
+ let summary = "";
+ let constructor = "::fir::createOpenACCLoopExpandPass()";
+ let dependentDialects = [
+ "fir::FIROpsDialect"
+ ];
+}
+
def VScaleAttr : Pass<"vscale-attr", "mlir::func::FuncOp"> {
let summary = "Add vscale_range attribute to functions";
let description = [{
diff --git a/flang/lib/Optimizer/Transforms/CMakeLists.txt b/flang/lib/Optimizer/Transforms/CMakeLists.txt
index 03b67104a93b575..03303ee14d91790 100644
--- a/flang/lib/Optimizer/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/Transforms/CMakeLists.txt
@@ -19,6 +19,7 @@ add_flang_library(FIRTransforms
LoopVersioning.cpp
OMPFunctionFiltering.cpp
OMPMarkDeclareTarget.cpp
+ OpenACCLoopExpand.cpp
VScaleAttr.cpp
DEPENDS
diff --git a/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
new file mode 100644
index 000000000000000..3d712c9ea8d7a41
--- /dev/null
+++ b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
@@ -0,0 +1,170 @@
+//===- OpenACCLoopExpand.cpp - expand acc.loop operand to fir.do_loop nest ===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/Support/FIRContext.h"
+#include "flang/Optimizer/Dialect/Support/KindMapping.h"
+#include "flang/Optimizer/Transforms/Passes.h"
+#include "mlir/Dialect/OpenACC/OpenACC.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/DialectConversion.h"
+
+namespace fir {
+#define GEN_PASS_DEF_OPENACCLOOPEXPAND
+#include "flang/Optimizer/Transforms/Passes.h.inc"
+} // namespace fir
+
+class LoopExpand : public fir::impl::OpenACCLoopExpandBase<LoopExpand> {
+public:
+ void runOnOperation() override;
+};
+
+static mlir::Value retrievePrivatizedIv(mlir::acc::LoopOp &op,
+ mlir::Value value) {
+ for (auto p : op.getPrivateOperands()) {
+ if (p == value) {
+ auto privateOp = mlir::cast<mlir::acc::PrivateOp>(p.getDefiningOp());
+ return privateOp.getVarPtr();
+ }
+ }
+ return mlir::Value{};
+}
+
+/// Reset operands and operand segments for the induction ranges.
+static void clearInductionRangesAndAttrs(fir::FirOpBuilder &builder,
+ mlir::acc::LoopOp &accLoopOp) {
+ // Remove the ranges.
+ accLoopOp.getLowerboundMutable().clear();
+ accLoopOp.getUpperboundMutable().clear();
+ accLoopOp.getStepMutable().clear();
+}
+
+static llvm::SmallVector<mlir::Value>
+getOriginalInductionVars(mlir::acc::LoopOp &accLoopOp) {
+ llvm::SmallVector<mlir::Value> ivs;
+ for (auto arg : accLoopOp.getBody().getArguments()) {
+ mlir::Value privateValue;
+ for (mlir::OpOperand &u : arg.getUses()) {
+ mlir::Operation *owner = u.getOwner();
+ if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(owner)) {
+ privateValue = storeOp.getMemref();
+ owner->erase();
+ }
+ }
+ mlir::Value originalIv = retrievePrivatizedIv(accLoopOp, privateValue);
+ assert(originalIv && "Expect induction variable to be found");
+ ivs.push_back(originalIv);
+ }
+ return ivs;
+}
+
+void LoopExpand::runOnOperation() {
+ mlir::func::FuncOp func = getOperation();
+
+ mlir::ModuleOp mod = func->getParentOfType<mlir::ModuleOp>();
+ fir::KindMapping kindMap = fir::getKindMapping(mod);
+ fir::FirOpBuilder builder{mod, std::move(kindMap)};
+
+ func.walk([&](mlir::acc::LoopOp accLoopOp) {
+ mlir::Location loc = accLoopOp.getLoc();
+ mlir::Type idxTy = builder.getIndexType();
+
+ bool isStructured = accLoopOp.getLoopRegions().front()->hasOneBlock();
+ bool finalCountValue = isStructured;
+ unsigned nbLoop = accLoopOp.getBody().getNumArguments();
+
+ // Gather original (non-privatized) induction variables.
+ llvm::SmallVector<mlir::Value> ivs = getOriginalInductionVars(accLoopOp);
+
+ // Remove block arguments in order to create loop-nest and move current body
+ // in the newly created loop nest.
+ accLoopOp.getBody().eraseArguments(0, nbLoop);
+ builder.setInsertionPointAfter(accLoopOp);
+
+ if (!isStructured) {
+ clearInductionRangesAndAttrs(builder, accLoopOp);
+ return;
+ }
+
+ llvm::SmallVector<mlir::Value> lbs, ubs, steps;
+ llvm::SmallVector<fir::DoLoopOp> loops;
+
+ // Create the loop nest, move the acc.loop body inside and move the loop
+ // nest inside the acc.loop again.
+ for (unsigned i = 0; i < nbLoop; ++i) {
+ bool isInnerLoop = i == (nbLoop - 1);
+
+ lbs.push_back(
+ builder.createConvert(loc, idxTy, accLoopOp.getLowerbound()[i]));
+ ubs.push_back(
+ builder.createConvert(loc, idxTy, accLoopOp.getUpperbound()[i]));
+ steps.push_back(
+ builder.createConvert(loc, idxTy, accLoopOp.getStep()[i]));
+ fir::DoLoopOp doLoopOp = builder.create<fir::DoLoopOp>(
+ loc, lbs[i], ubs[i], steps[i], /*unordered=*/false, finalCountValue,
+ mlir::ValueRange{accLoopOp.getLowerbound()[i]});
+ loops.push_back(doLoopOp);
+
+ if (isInnerLoop) {
+ // Move acc.loop body inside the newly created fir.do_loop.
+ accLoopOp.getBody().getTerminator()->erase();
+ doLoopOp.getRegion().takeBody(*accLoopOp.getLoopRegions().front());
+ // Recreate the block arguments.
+ doLoopOp.getBody()->addArgument(builder.getIndexType(), loc);
+ doLoopOp.getBody()->addArgument(accLoopOp.getLowerbound()[i].getType(),
+ loc);
+ } else {
+ builder.setInsertionPointToStart(doLoopOp.getBody());
+ }
+ }
+
+ // Move the loop nest inside the acc.loop region.
+ mlir::Block *newAccLoopBlock =
+ builder.createBlock(accLoopOp.getLoopRegions().front());
+ loops[0].getOperation()->moveBefore(newAccLoopBlock,
+ newAccLoopBlock->end());
+
+ for (unsigned i = 0; i < nbLoop; ++i) {
+ builder.setInsertionPointToStart(loops[i].getBody());
+ builder.create<fir::StoreOp>(loc, loops[i].getBody()->getArgument(1),
+ ivs[i]);
+
+ builder.setInsertionPointToEnd(loops[i].getBody());
+ llvm::SmallVector<mlir::Value, 2> results;
+ if (finalCountValue)
+ results.push_back(builder.create<mlir::arith::AddIOp>(
+ loc, loops[i].getInductionVar(), loops[i].getStep()));
+
+ // Step loopVariable to help optimizations such as vectorization.
+ // Induction variable elimination will clean up as necessary.
+ mlir::Value convStep = builder.create<fir::ConvertOp>(
+ loc, accLoopOp.getStep()[i].getType(), loops[i].getStep());
+ mlir::Value loopVar = builder.create<fir::LoadOp>(loc, ivs[i]);
+ results.push_back(
+ builder.create<mlir::arith::AddIOp>(loc, loopVar, convStep));
+ builder.create<fir::ResultOp>(loc, results);
+
+ // Convert ops have been created outside of the acc.loop operation. They
+ // need to be moved back before their uses.
+ lbs[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ ubs[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ steps[i].getDefiningOp()->moveBefore(loops[i].getOperation());
+ }
+
+ builder.setInsertionPointToEnd(newAccLoopBlock);
+ builder.create<mlir::acc::YieldOp>(loc);
+
+ clearInductionRangesAndAttrs(builder, accLoopOp);
+ });
+}
+
+std::unique_ptr<mlir::Pass> fir::createOpenACCLoopExpandPass() {
+ return std::make_unique<LoopExpand>();
+}
diff --git a/flang/test/Fir/OpenACC/loop-expand.f90 b/flang/test/Fir/OpenACC/loop-expand.f90
new file mode 100644
index 000000000000000..2efb2b2bd753355
--- /dev/null
+++ b/flang/test/Fir/OpenACC/loop-expand.f90
@@ -0,0 +1,118 @@
+! RUN: bbc -fopenacc -emit-hlfir %s -o - | fir-opt --split-input-file --acc-loop-expand | FileCheck %s
+
+subroutine singleloop(a)
+ real :: a(:)
+ integer :: i
+ a = 0.0
+
+ !$acc loop
+ do i = 1, 10
+ a(i) = i
+ end do
+end subroutine
+! CHECK-LABEL: func.func @_QPsingleloop
+! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsingleloopEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private
+! CHECK: %[[LB0:.*]] = fir.convert %c1_i32 : (i32) -> index
+! CHECK: %[[UB0:.*]] = fir.convert %c10_i32 : (i32) -> index
+! CHECK: %[[STEP0:.*]] = fir.convert %c1_i32_0 : (i32) -> index
+! CHECK: %{{.*}} = fir.do_loop %[[ARG1:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG2:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG2]] to %2#1 : !fir.ref<i32>
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
+! CHECK: %[[CONV_STEP:.*]] = fir.convert %[[STEP0]] : (index) -> i32
+! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %[[CONV_STEP]] : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+
+subroutine single_loop_with_nest(a)
+ real :: a(:,:)
+ integer :: i, j
+ a = 0.0
+
+ !$acc loop
+ do i = 1, 10
+ do j = 1, 10
+ a(i, j) = i
+ end do
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPsingle_loop_with_nest
+! CHECK: %[[I:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "_QFsingle_loop_with_nestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private
+! CHECK: %[[LB0:.*]] = fir.convert %c1_i32 : (i32) -> index
+! CHECK: %[[UB0:.*]] = fir.convert %c10_i32 : (i32) -> index
+! CHECK: %[[STEP0:.*]] = fir.convert %c1_i32_0 : (i32) -> index
+! CHECK: %{{.*}} = fir.do_loop %[[ARG1:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG2:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG2]] to %2#1 : !fir.ref<i32>
+! CHECK: fir.do_loop
+! CHECK: }
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
+! CHECK: %[[CONV_STEP:.*]] = fir.convert %[[STEP0]] : (index) -> i32
+! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %[[CONV_STEP]] : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+
+subroutine loop_with_nest(a)
+ real :: a(:,:)
+ integer :: i, j
+ a = 0.0
+
+ !$acc loop collapse(2)
+ do i = 1, 10
+ do j = 1, 10
+ a(i, j) = i
+ end do
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_with_nest
+! CHECK: %[[I:.*]]:2 = hlfir.declare %1 {uniq_name = "_QFloop_with_nestEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: %[[J:.*]]:2 = hlfir.declare %3 {uniq_name = "_QFloop_with_nestEj"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)
+! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>, @privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
+! CHECK: %[[LB0:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[UB0:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[STEP0:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %{{.*}}:2 = fir.do_loop %[[ARG1:.*]] = %[[LB0]] to %[[UB0]] step %[[STEP0]] iter_args(%[[ARG2:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG2]] to %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[LB1:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[UB1:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %[[STEP1:.*]] = fir.convert %{{.*}} : (i32) -> index
+! CHECK: %{{.*}}:2 = fir.do_loop %[[ARG3:.*]] = %[[LB1]] to %[[UB1]] step %[[STEP1]] iter_args(%[[ARG4:.*]] = %{{.*}}) -> (index, i32) {
+! CHECK: fir.store %[[ARG4]] to %[[J]]#1 : !fir.ref<i32>
+
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG3]], %[[STEP1]] : index
+! CHECK: %[[CONV_STEP1:.*]] = fir.convert %[[STEP1]] : (index) -> i32
+! CHECK: %[[LOAD_J:.*]] = fir.load %[[J]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_J]], %[[CONV_STEP1]] : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: %[[INCR1:.*]] = arith.addi %[[ARG1]], %[[STEP0]] : index
+! CHECK: %[[CONV_STEP0:.*]] = fir.convert %[[STEP0]] : (index) -> i32
+! CHECK: %[[LOAD_I:.*]] = fir.load %[[I]]#1 : !fir.ref<i32>
+! CHECK: %[[INCR2:.*]] = arith.addi %[[LOAD_I]], %18 : i32
+! CHECK: fir.result %[[INCR1]], %[[INCR2]] : index, i32
+! CHECK: }
+! CHECK: acc.yield
+! CHECK: }
+
+subroutine loop_unstructured(a)
+ real :: a(:)
+ integer :: i
+ a = 0.0
+
+ !$acc loop
+ do i = 1, 10
+ if (a(i) > 0.0) stop 'stop'
+ a(i) = i
+ end do
+end subroutine
+
+! CHECK-LABEL: func.func @_QPloop_unstructured
+! CHECK: acc.loop private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>)
diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
index 391e77e0c4081a3..62ab100847619a2 100644
--- a/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
+++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
@@ -1218,6 +1218,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
/// The i-th data operand passed.
Value getDataOperand(unsigned i);
+
+ Block &getBody() { return getLoopRegions().front()->front(); }
}];
let hasCustomAssemblyFormat = 1;
@@ -1237,7 +1239,7 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
)
$region
( `(` type($results)^ `)` )?
- attr-dict-with-keyword
+ attr-dict-with-keyword
}];
let hasVerifier = 1;
|
1b3af42
to
a4f5168
Compare
You can test this locally with the following command:git-clang-format --diff 3ed940ac3dac03d044a8d1e51005cec84dd128f9 b8f83f1bf14ba6a55bfc68bfb5f718b24cb67baf -- flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp flang/include/flang/Optimizer/Transforms/Passes.h View the diff from clang-format here.diff --git a/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
index 84a2fa465e..46bea72f1b 100644
--- a/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
+++ b/flang/lib/Optimizer/Transforms/OpenACCLoopExpand.cpp
@@ -121,7 +121,8 @@ static void clearIVPrivatizations(llvm::SmallVector<mlir::Value> &ivs,
}
static mlir::Value createConvertOrConstant(fir::FirOpBuilder &builder,
- mlir::Location loc, mlir::Type ty, mlir::Value originalValue) {
+ mlir::Location loc, mlir::Type ty,
+ mlir::Value originalValue) {
if (auto intValue = fir::getIntIfConstant(originalValue))
return builder.createIntegerConstant(loc, ty, *intValue);
return builder.createConvert(loc, ty, originalValue);
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks great!
06ede88
to
72cfb19
Compare
72cfb19
to
b8f83f1
Compare
acc.loop
operations get a new design where the induction ranges can be part of the operation directly. The operation can also be a simple wrapper around a loop nest.This patch adds a loop-expand pass that can transform a acc.loop operation with induction ranges to a acc.loop operation with a fir.do_loop nest inside.
This patch depends on two patches that update the design and lowering:
#67355
#65417