-
Notifications
You must be signed in to change notification settings - Fork 11k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Flang] Move genMinMaxlocReductionLoop to Transforms/Utils.cpp #81380
Conversation
@llvm/pr-subscribers-flang-fir-hlfir Author: David Green (davemgreen) ChangesThis is one option for attempting to move genMinMaxlocReductionLoop to a better location. It moves it into Transforms and makes HLFIRTranforms depend upon FIRTransforms. It passes a build locally, both with and without -DBUILD_SHARED_LIBS, but that might not include all configurations. Full diff: https://github.com/llvm/llvm-project/pull/81380.diff 6 Files Affected:
diff --git a/flang/include/flang/Optimizer/Support/Utils.h b/flang/include/flang/Optimizer/Support/Utils.h
index 4e06bf8bafdc83..7a8a34c25ce95c 100644
--- a/flang/include/flang/Optimizer/Support/Utils.h
+++ b/flang/include/flang/Optimizer/Support/Utils.h
@@ -18,7 +18,6 @@
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIRType.h"
-#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/Support/FatalError.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
@@ -135,144 +134,6 @@ inline void intrinsicTypeTODO(fir::FirOpBuilder &builder, mlir::Type type,
" in " + intrinsicName);
}
-using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
- fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
- mlir::Value, mlir::Value, const llvm::SmallVectorImpl<mlir::Value> &)>;
-using InitValGeneratorTy = llvm::function_ref<mlir::Value(
- fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>;
-using AddrGeneratorTy = llvm::function_ref<mlir::Value(
- fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
- mlir::Value)>;
-
-// Produces a loop nest for a Minloc intrinsic.
-inline void genMinMaxlocReductionLoop(
- fir::FirOpBuilder &builder, mlir::Value array,
- fir::InitValGeneratorTy initVal, fir::MinlocBodyOpGeneratorTy genBody,
- fir::AddrGeneratorTy getAddrFn, unsigned rank, mlir::Type elementType,
- mlir::Location loc, mlir::Type maskElemType, mlir::Value resultArr,
- bool maskMayBeLogicalScalar) {
- mlir::IndexType idxTy = builder.getIndexType();
-
- mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
-
- fir::SequenceType::Shape flatShape(rank,
- fir::SequenceType::getUnknownExtent());
- mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
- mlir::Type boxArrTy = fir::BoxType::get(arrTy);
- array = builder.create<fir::ConvertOp>(loc, boxArrTy, array);
-
- mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType());
- mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
- mlir::Value zero = builder.createIntegerConstant(loc, resultElemType, 0);
- mlir::Value flagRef = builder.createTemporary(loc, resultElemType);
- builder.create<fir::StoreOp>(loc, zero, flagRef);
-
- mlir::Value init = initVal(builder, loc, elementType);
- llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds;
-
- assert(rank > 0 && "rank cannot be zero");
- mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
-
- // Compute all the upper bounds before the loop nest.
- // It is not strictly necessary for performance, since the loop nest
- // does not have any store operations and any LICM optimization
- // should be able to optimize the redundancy.
- for (unsigned i = 0; i < rank; ++i) {
- mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);
- auto dims =
- builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array, dimIdx);
- mlir::Value len = dims.getResult(1);
- // We use C indexing here, so len-1 as loopcount
- mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one);
- bounds.push_back(loopCount);
- }
- // Create a loop nest consisting of OP operations.
- // Collect the loops' induction variables into indices array,
- // which will be used in the innermost loop to load the input
- // array's element.
- // The loops are generated such that the innermost loop processes
- // the 0 dimension.
- llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices;
- for (unsigned i = rank; 0 < i; --i) {
- mlir::Value step = one;
- mlir::Value loopCount = bounds[i - 1];
- auto loop =
- builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step, false,
- /*finalCountValue=*/false, init);
- init = loop.getRegionIterArgs()[0];
- indices.push_back(loop.getInductionVar());
- // Set insertion point to the loop body so that the next loop
- // is inserted inside the current one.
- builder.setInsertionPointToStart(loop.getBody());
- }
-
- // Reverse the indices such that they are ordered as:
- // <dim-0-idx, dim-1-idx, ...>
- std::reverse(indices.begin(), indices.end());
- mlir::Value reductionVal =
- genBody(builder, loc, elementType, array, flagRef, init, indices);
-
- // Unwind the loop nest and insert ResultOp on each level
- // to return the updated value of the reduction to the enclosing
- // loops.
- for (unsigned i = 0; i < rank; ++i) {
- auto result = builder.create<fir::ResultOp>(loc, reductionVal);
- // Proceed to the outer loop.
- auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
- reductionVal = loop.getResult(0);
- // Set insertion point after the loop operation that we have
- // just processed.
- builder.setInsertionPointAfter(loop.getOperation());
- }
- // End of loop nest. The insertion point is after the outermost loop.
- if (maskMayBeLogicalScalar) {
- if (fir::IfOp ifOp =
- mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
- builder.create<fir::ResultOp>(loc, reductionVal);
- builder.setInsertionPointAfter(ifOp);
- // Redefine flagSet to escape scope of ifOp
- flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
- reductionVal = ifOp.getResult(0);
- }
- }
-
- // Check for case where array was full of max values.
- // flag will be 0 if mask was never true, 1 if mask was true as some point,
- // this is needed to avoid catching cases where we didn't access any elements
- // e.g. mask=.FALSE.
- mlir::Value flagValue =
- builder.create<fir::LoadOp>(loc, resultElemType, flagRef);
- mlir::Value flagCmp = builder.create<mlir::arith::CmpIOp>(
- loc, mlir::arith::CmpIPredicate::eq, flagValue, flagSet);
- fir::IfOp ifMaskTrueOp =
- builder.create<fir::IfOp>(loc, flagCmp, /*withElseRegion=*/false);
- builder.setInsertionPointToStart(&ifMaskTrueOp.getThenRegion().front());
-
- mlir::Value testInit = initVal(builder, loc, elementType);
- fir::IfOp ifMinSetOp;
- if (elementType.isa<mlir::FloatType>()) {
- mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
- loc, mlir::arith::CmpFPredicate::OEQ, testInit, reductionVal);
- ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
- /*withElseRegion*/ false);
- } else {
- mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
- loc, mlir::arith::CmpIPredicate::eq, testInit, reductionVal);
- ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
- /*withElseRegion*/ false);
- }
- builder.setInsertionPointToStart(&ifMinSetOp.getThenRegion().front());
-
- // Load output array with 1s instead of 0s
- for (unsigned int i = 0; i < rank; ++i) {
- mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
- mlir::Value resultElemAddr =
- getAddrFn(builder, loc, resultElemType, resultArr, index);
- builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr);
- }
- builder.setInsertionPointAfter(ifMaskTrueOp);
-}
-
inline fir::CUDADataAttributeAttr
getCUDADataAttribute(mlir::MLIRContext *mlirContext,
std::optional<Fortran::common::CUDADataAttr> cudaAttr) {
diff --git a/flang/include/flang/Optimizer/Transforms/Utils.h b/flang/include/flang/Optimizer/Transforms/Utils.h
new file mode 100644
index 00000000000000..49a616fb40fd5e
--- /dev/null
+++ b/flang/include/flang/Optimizer/Transforms/Utils.h
@@ -0,0 +1,38 @@
+//===-- Optimizer/Transforms/Utils.h ----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H
+#define FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H
+
+namespace fir {
+
+using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value(
+ fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
+ mlir::Value, mlir::Value, const llvm::SmallVectorImpl<mlir::Value> &)>;
+using InitValGeneratorTy = llvm::function_ref<mlir::Value(
+ fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>;
+using AddrGeneratorTy = llvm::function_ref<mlir::Value(
+ fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value,
+ mlir::Value)>;
+
+// Produces a loop nest for a Minloc intrinsic.
+void genMinMaxlocReductionLoop(fir::FirOpBuilder &builder, mlir::Value array,
+ fir::InitValGeneratorTy initVal,
+ fir::MinlocBodyOpGeneratorTy genBody,
+ fir::AddrGeneratorTy getAddrFn, unsigned rank,
+ mlir::Type elementType, mlir::Location loc,
+ mlir::Type maskElemType, mlir::Value resultArr,
+ bool maskMayBeLogicalScalar);
+
+} // namespace fir
+
+#endif // FORTRAN_OPTIMIZER_TRANSFORMS_UTILS_H
diff --git a/flang/lib/Optimizer/Dialect/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CMakeLists.txt
index 58a4276224b34c..745439b7e1e5e8 100644
--- a/flang/lib/Optimizer/Dialect/CMakeLists.txt
+++ b/flang/lib/Optimizer/Dialect/CMakeLists.txt
@@ -13,7 +13,6 @@ add_flang_library(FIRDialect
CanonicalizationPatternsIncGen
MLIRIR
FIROpsIncGen
- HLFIROpsIncGen
intrinsics_gen
LINK_LIBS
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
index 603b328a6823fe..ad569ce3b41f17 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
+++ b/flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
@@ -21,6 +21,7 @@ add_flang_library(HLFIRTransforms
FIRBuilder
FIRDialectSupport
FIRSupport
+ FIRTransforms
HLFIRDialect
MLIRIR
${dialect_libs}
diff --git a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
index 523671f0605f77..c2512c7df32f46 100644
--- a/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
+++ b/flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp
@@ -20,7 +20,7 @@
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/HLFIR/Passes.h"
-#include "flang/Optimizer/Support/Utils.h"
+#include "flang/Optimizer/Transforms/Utils.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
#include "mlir/IR/Dominance.h"
#include "mlir/IR/PatternMatch.h"
diff --git a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
index b415463075d68f..86343e23c6e5db 100644
--- a/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
+++ b/flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
@@ -31,8 +31,8 @@
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/Dialect/Support/FIRContext.h"
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
-#include "flang/Optimizer/Support/Utils.h"
#include "flang/Optimizer/Transforms/Passes.h"
+#include "flang/Optimizer/Transforms/Utils.h"
#include "flang/Runtime/entry-names.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Matchers.h"
@@ -558,6 +558,135 @@ static mlir::FunctionType genRuntimeMinlocType(fir::FirOpBuilder &builder,
{boxRefType, boxType, boxType}, {});
}
+// Produces a loop nest for a Minloc intrinsic.
+void fir::genMinMaxlocReductionLoop(
+ fir::FirOpBuilder &builder, mlir::Value array,
+ fir::InitValGeneratorTy initVal, fir::MinlocBodyOpGeneratorTy genBody,
+ fir::AddrGeneratorTy getAddrFn, unsigned rank, mlir::Type elementType,
+ mlir::Location loc, mlir::Type maskElemType, mlir::Value resultArr,
+ bool maskMayBeLogicalScalar) {
+ mlir::IndexType idxTy = builder.getIndexType();
+
+ mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);
+
+ fir::SequenceType::Shape flatShape(rank,
+ fir::SequenceType::getUnknownExtent());
+ mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
+ mlir::Type boxArrTy = fir::BoxType::get(arrTy);
+ array = builder.create<fir::ConvertOp>(loc, boxArrTy, array);
+
+ mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType());
+ mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
+ mlir::Value zero = builder.createIntegerConstant(loc, resultElemType, 0);
+ mlir::Value flagRef = builder.createTemporary(loc, resultElemType);
+ builder.create<fir::StoreOp>(loc, zero, flagRef);
+
+ mlir::Value init = initVal(builder, loc, elementType);
+ llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds;
+
+ assert(rank > 0 && "rank cannot be zero");
+ mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+
+ // Compute all the upper bounds before the loop nest.
+ // It is not strictly necessary for performance, since the loop nest
+ // does not have any store operations and any LICM optimization
+ // should be able to optimize the redundancy.
+ for (unsigned i = 0; i < rank; ++i) {
+ mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);
+ auto dims =
+ builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array, dimIdx);
+ mlir::Value len = dims.getResult(1);
+ // We use C indexing here, so len-1 as loopcount
+ mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one);
+ bounds.push_back(loopCount);
+ }
+ // Create a loop nest consisting of OP operations.
+ // Collect the loops' induction variables into indices array,
+ // which will be used in the innermost loop to load the input
+ // array's element.
+ // The loops are generated such that the innermost loop processes
+ // the 0 dimension.
+ llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices;
+ for (unsigned i = rank; 0 < i; --i) {
+ mlir::Value step = one;
+ mlir::Value loopCount = bounds[i - 1];
+ auto loop =
+ builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step, false,
+ /*finalCountValue=*/false, init);
+ init = loop.getRegionIterArgs()[0];
+ indices.push_back(loop.getInductionVar());
+ // Set insertion point to the loop body so that the next loop
+ // is inserted inside the current one.
+ builder.setInsertionPointToStart(loop.getBody());
+ }
+
+ // Reverse the indices such that they are ordered as:
+ // <dim-0-idx, dim-1-idx, ...>
+ std::reverse(indices.begin(), indices.end());
+ mlir::Value reductionVal =
+ genBody(builder, loc, elementType, array, flagRef, init, indices);
+
+ // Unwind the loop nest and insert ResultOp on each level
+ // to return the updated value of the reduction to the enclosing
+ // loops.
+ for (unsigned i = 0; i < rank; ++i) {
+ auto result = builder.create<fir::ResultOp>(loc, reductionVal);
+ // Proceed to the outer loop.
+ auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
+ reductionVal = loop.getResult(0);
+ // Set insertion point after the loop operation that we have
+ // just processed.
+ builder.setInsertionPointAfter(loop.getOperation());
+ }
+ // End of loop nest. The insertion point is after the outermost loop.
+ if (maskMayBeLogicalScalar) {
+ if (fir::IfOp ifOp =
+ mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
+ builder.create<fir::ResultOp>(loc, reductionVal);
+ builder.setInsertionPointAfter(ifOp);
+ // Redefine flagSet to escape scope of ifOp
+ flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
+ reductionVal = ifOp.getResult(0);
+ }
+ }
+
+ // Check for case where array was full of max values.
+ // flag will be 0 if mask was never true, 1 if mask was true as some point,
+ // this is needed to avoid catching cases where we didn't access any elements
+ // e.g. mask=.FALSE.
+ mlir::Value flagValue =
+ builder.create<fir::LoadOp>(loc, resultElemType, flagRef);
+ mlir::Value flagCmp = builder.create<mlir::arith::CmpIOp>(
+ loc, mlir::arith::CmpIPredicate::eq, flagValue, flagSet);
+ fir::IfOp ifMaskTrueOp =
+ builder.create<fir::IfOp>(loc, flagCmp, /*withElseRegion=*/false);
+ builder.setInsertionPointToStart(&ifMaskTrueOp.getThenRegion().front());
+
+ mlir::Value testInit = initVal(builder, loc, elementType);
+ fir::IfOp ifMinSetOp;
+ if (elementType.isa<mlir::FloatType>()) {
+ mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
+ loc, mlir::arith::CmpFPredicate::OEQ, testInit, reductionVal);
+ ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
+ /*withElseRegion*/ false);
+ } else {
+ mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
+ loc, mlir::arith::CmpIPredicate::eq, testInit, reductionVal);
+ ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
+ /*withElseRegion*/ false);
+ }
+ builder.setInsertionPointToStart(&ifMinSetOp.getThenRegion().front());
+
+ // Load output array with 1s instead of 0s
+ for (unsigned int i = 0; i < rank; ++i) {
+ mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
+ mlir::Value resultElemAddr =
+ getAddrFn(builder, loc, resultElemType, resultArr, index);
+ builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr);
+ }
+ builder.setInsertionPointAfter(ifMaskTrueOp);
+}
+
static void genRuntimeMinMaxlocBody(fir::FirOpBuilder &builder,
mlir::func::FuncOp &funcOp, bool isMax,
unsigned rank, int maskRank,
|
This looks like it does OK locally and in CI. Let me know if you think there is a better place for it. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you, @davemgreen. It seems okay that HLFIRTransforms
depends on FIRTransforms
.
This is one option for attempting to move genMinMaxlocReductionLoop to a better location. It moves it into Transforms and makes HLFIRTranforms depend upon FIRTransforms.
36b959b
to
3c533ae
Compare
Thanks. Let me know if this causes problems in some other configuration. |
This is one option for attempting to move genMinMaxlocReductionLoop to a better location. It moves it into Transforms and makes HLFIRTranforms depend upon FIRTransforms.
It passes a build locally, both with and without -DBUILD_SHARED_LIBS, but that might not include all configurations.