Skip to content

Commit

Permalink
[Flang] Move genMinMaxlocReductionLoop to a common location.
Browse files Browse the repository at this point in the history
The shared library build doesn't like references of genMinMaxlocReductionLoop,
in Optimizer/Transforms, from HLFIR/Optimizer/Transforms. For the moment I've
moved the code to the header file where it can be shared, like other methods in
Utils.h
  • Loading branch information
davemgreen committed Jan 25, 2024
1 parent b0b7be2 commit 202917f
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 135 deletions.
135 changes: 128 additions & 7 deletions flang/include/flang/Optimizer/Support/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIROps.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/HLFIR/HLFIRDialect.h"
#include "flang/Optimizer/Support/FatalError.h"
#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"
Expand Down Expand Up @@ -144,13 +145,133 @@ using AddrGeneratorTy = llvm::function_ref<mlir::Value(
mlir::Value)>;

// Produces a loop nest for a Minloc intrinsic.
void genMinMaxlocReductionLoop(fir::FirOpBuilder &builder, mlir::Value array,
InitValGeneratorTy initVal,
MinlocBodyOpGeneratorTy genBody,
fir::AddrGeneratorTy getAddrFn, unsigned rank,
mlir::Type elementType, mlir::Location loc,
mlir::Type maskElemType, mlir::Value resultArr,
bool maskMayBeLogicalScalar);
inline void genMinMaxlocReductionLoop(
fir::FirOpBuilder &builder, mlir::Value array,
fir::InitValGeneratorTy initVal, fir::MinlocBodyOpGeneratorTy genBody,
fir::AddrGeneratorTy getAddrFn, unsigned rank, mlir::Type elementType,
mlir::Location loc, mlir::Type maskElemType, mlir::Value resultArr,
bool maskMayBeLogicalScalar) {
mlir::IndexType idxTy = builder.getIndexType();

mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);

fir::SequenceType::Shape flatShape(rank,
fir::SequenceType::getUnknownExtent());
mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
mlir::Type boxArrTy = fir::BoxType::get(arrTy);
array = builder.create<fir::ConvertOp>(loc, boxArrTy, array);

mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType());
mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
mlir::Value zero = builder.createIntegerConstant(loc, resultElemType, 0);
mlir::Value flagRef = builder.createTemporary(loc, resultElemType);
builder.create<fir::StoreOp>(loc, zero, flagRef);

mlir::Value init = initVal(builder, loc, elementType);
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds;

assert(rank > 0 && "rank cannot be zero");
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);

// Compute all the upper bounds before the loop nest.
// It is not strictly necessary for performance, since the loop nest
// does not have any store operations and any LICM optimization
// should be able to optimize the redundancy.
for (unsigned i = 0; i < rank; ++i) {
mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);
auto dims =
builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array, dimIdx);
mlir::Value len = dims.getResult(1);
// We use C indexing here, so len-1 as loopcount
mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one);
bounds.push_back(loopCount);
}
// Create a loop nest consisting of OP operations.
// Collect the loops' induction variables into indices array,
// which will be used in the innermost loop to load the input
// array's element.
// The loops are generated such that the innermost loop processes
// the 0 dimension.
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices;
for (unsigned i = rank; 0 < i; --i) {
mlir::Value step = one;
mlir::Value loopCount = bounds[i - 1];
auto loop =
builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step, false,
/*finalCountValue=*/false, init);
init = loop.getRegionIterArgs()[0];
indices.push_back(loop.getInductionVar());
// Set insertion point to the loop body so that the next loop
// is inserted inside the current one.
builder.setInsertionPointToStart(loop.getBody());
}

// Reverse the indices such that they are ordered as:
// <dim-0-idx, dim-1-idx, ...>
std::reverse(indices.begin(), indices.end());
mlir::Value reductionVal =
genBody(builder, loc, elementType, array, flagRef, init, indices);

// Unwind the loop nest and insert ResultOp on each level
// to return the updated value of the reduction to the enclosing
// loops.
for (unsigned i = 0; i < rank; ++i) {
auto result = builder.create<fir::ResultOp>(loc, reductionVal);
// Proceed to the outer loop.
auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
reductionVal = loop.getResult(0);
// Set insertion point after the loop operation that we have
// just processed.
builder.setInsertionPointAfter(loop.getOperation());
}
// End of loop nest. The insertion point is after the outermost loop.
if (maskMayBeLogicalScalar) {
if (fir::IfOp ifOp =
mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
builder.create<fir::ResultOp>(loc, reductionVal);
builder.setInsertionPointAfter(ifOp);
// Redefine flagSet to escape scope of ifOp
flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
reductionVal = ifOp.getResult(0);
}
}

// Check for case where array was full of max values.
// flag will be 0 if mask was never true, 1 if mask was true as some point,
// this is needed to avoid catching cases where we didn't access any elements
// e.g. mask=.FALSE.
mlir::Value flagValue =
builder.create<fir::LoadOp>(loc, resultElemType, flagRef);
mlir::Value flagCmp = builder.create<mlir::arith::CmpIOp>(
loc, mlir::arith::CmpIPredicate::eq, flagValue, flagSet);
fir::IfOp ifMaskTrueOp =
builder.create<fir::IfOp>(loc, flagCmp, /*withElseRegion=*/false);
builder.setInsertionPointToStart(&ifMaskTrueOp.getThenRegion().front());

mlir::Value testInit = initVal(builder, loc, elementType);
fir::IfOp ifMinSetOp;
if (elementType.isa<mlir::FloatType>()) {
mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
loc, mlir::arith::CmpFPredicate::OEQ, testInit, reductionVal);
ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
/*withElseRegion*/ false);
} else {
mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
loc, mlir::arith::CmpIPredicate::eq, testInit, reductionVal);
ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
/*withElseRegion*/ false);
}
builder.setInsertionPointToStart(&ifMinSetOp.getThenRegion().front());

// Load output array with 1s instead of 0s
for (unsigned int i = 0; i < rank; ++i) {
mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
mlir::Value resultElemAddr =
getAddrFn(builder, loc, resultElemType, resultArr, index);
builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr);
}
builder.setInsertionPointAfter(ifMaskTrueOp);
}

} // namespace fir

Expand Down
128 changes: 0 additions & 128 deletions flang/lib/Optimizer/Transforms/SimplifyIntrinsics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -353,134 +353,6 @@ genReductionLoop(fir::FirOpBuilder &builder, mlir::func::FuncOp &funcOp,
builder.create<mlir::func::ReturnOp>(loc, results[resultIndex]);
}

void fir::genMinMaxlocReductionLoop(
fir::FirOpBuilder &builder, mlir::Value array,
fir::InitValGeneratorTy initVal, fir::MinlocBodyOpGeneratorTy genBody,
fir::AddrGeneratorTy getAddrFn, unsigned rank, mlir::Type elementType,
mlir::Location loc, mlir::Type maskElemType, mlir::Value resultArr,
bool maskMayBeLogicalScalar) {
mlir::IndexType idxTy = builder.getIndexType();

mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0);

fir::SequenceType::Shape flatShape(rank,
fir::SequenceType::getUnknownExtent());
mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType);
mlir::Type boxArrTy = fir::BoxType::get(arrTy);
array = builder.create<fir::ConvertOp>(loc, boxArrTy, array);

mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType());
mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
mlir::Value zero = builder.createIntegerConstant(loc, resultElemType, 0);
mlir::Value flagRef = builder.createTemporary(loc, resultElemType);
builder.create<fir::StoreOp>(loc, zero, flagRef);

mlir::Value init = initVal(builder, loc, elementType);
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds;

assert(rank > 0 && "rank cannot be zero");
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);

// Compute all the upper bounds before the loop nest.
// It is not strictly necessary for performance, since the loop nest
// does not have any store operations and any LICM optimization
// should be able to optimize the redundancy.
for (unsigned i = 0; i < rank; ++i) {
mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i);
auto dims =
builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array, dimIdx);
mlir::Value len = dims.getResult(1);
// We use C indexing here, so len-1 as loopcount
mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one);
bounds.push_back(loopCount);
}
// Create a loop nest consisting of OP operations.
// Collect the loops' induction variables into indices array,
// which will be used in the innermost loop to load the input
// array's element.
// The loops are generated such that the innermost loop processes
// the 0 dimension.
llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices;
for (unsigned i = rank; 0 < i; --i) {
mlir::Value step = one;
mlir::Value loopCount = bounds[i - 1];
auto loop =
builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step, false,
/*finalCountValue=*/false, init);
init = loop.getRegionIterArgs()[0];
indices.push_back(loop.getInductionVar());
// Set insertion point to the loop body so that the next loop
// is inserted inside the current one.
builder.setInsertionPointToStart(loop.getBody());
}

// Reverse the indices such that they are ordered as:
// <dim-0-idx, dim-1-idx, ...>
std::reverse(indices.begin(), indices.end());
mlir::Value reductionVal =
genBody(builder, loc, elementType, array, flagRef, init, indices);

// Unwind the loop nest and insert ResultOp on each level
// to return the updated value of the reduction to the enclosing
// loops.
for (unsigned i = 0; i < rank; ++i) {
auto result = builder.create<fir::ResultOp>(loc, reductionVal);
// Proceed to the outer loop.
auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp());
reductionVal = loop.getResult(0);
// Set insertion point after the loop operation that we have
// just processed.
builder.setInsertionPointAfter(loop.getOperation());
}
// End of loop nest. The insertion point is after the outermost loop.
if (maskMayBeLogicalScalar) {
if (fir::IfOp ifOp =
mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) {
builder.create<fir::ResultOp>(loc, reductionVal);
builder.setInsertionPointAfter(ifOp);
// Redefine flagSet to escape scope of ifOp
flagSet = builder.createIntegerConstant(loc, resultElemType, 1);
reductionVal = ifOp.getResult(0);
}
}

// Check for case where array was full of max values.
// flag will be 0 if mask was never true, 1 if mask was true as some point,
// this is needed to avoid catching cases where we didn't access any elements
// e.g. mask=.FALSE.
mlir::Value flagValue =
builder.create<fir::LoadOp>(loc, resultElemType, flagRef);
mlir::Value flagCmp = builder.create<mlir::arith::CmpIOp>(
loc, mlir::arith::CmpIPredicate::eq, flagValue, flagSet);
fir::IfOp ifMaskTrueOp =
builder.create<fir::IfOp>(loc, flagCmp, /*withElseRegion=*/false);
builder.setInsertionPointToStart(&ifMaskTrueOp.getThenRegion().front());

mlir::Value testInit = initVal(builder, loc, elementType);
fir::IfOp ifMinSetOp;
if (elementType.isa<mlir::FloatType>()) {
mlir::Value cmp = builder.create<mlir::arith::CmpFOp>(
loc, mlir::arith::CmpFPredicate::OEQ, testInit, reductionVal);
ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
/*withElseRegion*/ false);
} else {
mlir::Value cmp = builder.create<mlir::arith::CmpIOp>(
loc, mlir::arith::CmpIPredicate::eq, testInit, reductionVal);
ifMinSetOp = builder.create<fir::IfOp>(loc, cmp,
/*withElseRegion*/ false);
}
builder.setInsertionPointToStart(&ifMinSetOp.getThenRegion().front());

// Load output array with 1s instead of 0s
for (unsigned int i = 0; i < rank; ++i) {
mlir::Value index = builder.createIntegerConstant(loc, idxTy, i);
mlir::Value resultElemAddr =
getAddrFn(builder, loc, resultElemType, resultArr, index);
builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr);
}
builder.setInsertionPointAfter(ifMaskTrueOp);
}

static llvm::SmallVector<mlir::Value> nopLoopCond(fir::FirOpBuilder &builder,
mlir::Location loc,
mlir::Value reductionVal) {
Expand Down

0 comments on commit 202917f

Please sign in to comment.