Skip to content

Commit

Permalink
[flang][OpenMP] lower simple array reductions (#84958)
Browse files Browse the repository at this point in the history
This has been tested with arrays with compile-time constant bounds.
Allocatable arrays and arrays with non-constant bounds are not yet
supported. User-defined reduction functions are also not yet supported.

The design is intended to work for arrays with non-constant bounds too
without a lot of extra work (mostly there are bugs in OpenMPIRBuilder I
haven't fixed yet).

We need some way to get these runtime bounds into the reduction init and
combiner regions. To keep things simple for now I opted to always box
the array arguments so the box can be passed as one argument and the
lower bounds and extents read from the box. This has the disadvantage of
resulting in fir.box_dim operations inside of the critical section. If
these prove to be a performance issue, we could follow OpenACC reading
box lower bounds and extents before the reduction and passing them as
block arguments to the reduction init and combiner regions. I would
prefer to keep things simple for now.

Note: this implementation only works when the HLFIR lowering is used. I
don't think it is worth supporting FIR-only lowering because the plan is
for that to be removed soon.

OpenMP array reductions 6/6
Previous PR: #84957
  • Loading branch information
tblah committed Mar 20, 2024
1 parent 22f2056 commit 197f3ec
Show file tree
Hide file tree
Showing 9 changed files with 586 additions and 54 deletions.
5 changes: 5 additions & 0 deletions flang/include/flang/Optimizer/Builder/HLFIRTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,11 @@ std::pair<hlfir::Entity, mlir::Value>
createTempFromMold(mlir::Location loc, fir::FirOpBuilder &builder,
hlfir::Entity mold);

// TODO: this does not support polymorphic molds
hlfir::Entity createStackTempFromMold(mlir::Location loc,
fir::FirOpBuilder &builder,
hlfir::Entity mold);

hlfir::EntityWithAttributes convertCharacterKind(mlir::Location loc,
fir::FirOpBuilder &builder,
hlfir::Entity scalarChar,
Expand Down
250 changes: 212 additions & 38 deletions flang/lib/Lower/OpenMP/ReductionProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "ReductionProcessor.h"

#include "flang/Lower/AbstractConverter.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Builder/Todo.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
Expand Down Expand Up @@ -90,10 +91,42 @@ std::string ReductionProcessor::getReductionName(llvm::StringRef name,
if (isByRef)
byrefAddition = "_byref";

return (llvm::Twine(name) +
(ty.isIntOrIndex() ? llvm::Twine("_i_") : llvm::Twine("_f_")) +
llvm::Twine(ty.getIntOrFloatBitWidth()) + byrefAddition)
.str();
if (fir::isa_trivial(ty))
return (llvm::Twine(name) +
(ty.isIntOrIndex() ? llvm::Twine("_i_") : llvm::Twine("_f_")) +
llvm::Twine(ty.getIntOrFloatBitWidth()) + byrefAddition)
.str();

// creates a name like reduction_i_64_box_ux4x3
if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) {
// TODO: support for allocatable boxes:
// !fir.box<!fir.heap<!fir.array<...>>>
fir::SequenceType seqTy = fir::unwrapRefType(boxTy.getEleTy())
.dyn_cast_or_null<fir::SequenceType>();
if (!seqTy)
return {};

std::string prefix = getReductionName(
name, fir::unwrapSeqOrBoxedSeqType(ty), /*isByRef=*/false);
if (prefix.empty())
return {};
std::stringstream tyStr;
tyStr << prefix << "_box_";
bool first = true;
for (std::int64_t extent : seqTy.getShape()) {
if (first)
first = false;
else
tyStr << "x";
if (extent == seqTy.getUnknownExtent())
tyStr << 'u'; // I'm not sure that '?' is safe in symbol names
else
tyStr << extent;
}
return (tyStr.str() + byrefAddition).str();
}

return {};
}

std::string ReductionProcessor::getReductionName(
Expand Down Expand Up @@ -281,13 +314,158 @@ mlir::Value ReductionProcessor::createScalarCombiner(
return reductionOp;
}

/// Create reduction combiner region for reduction variables which are boxed
/// arrays
static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
ReductionProcessor::ReductionIdentifier redId,
fir::BaseBoxType boxTy, mlir::Value lhs,
mlir::Value rhs) {
fir::SequenceType seqTy =
mlir::dyn_cast_or_null<fir::SequenceType>(boxTy.getEleTy());
// TODO: support allocatable arrays: !fir.box<!fir.heap<!fir.array<...>>>
if (!seqTy || seqTy.hasUnknownShape())
TODO(loc, "Unsupported boxed type in OpenMP reduction");

// load fir.ref<fir.box<...>>
mlir::Value lhsAddr = lhs;
lhs = builder.create<fir::LoadOp>(loc, lhs);
rhs = builder.create<fir::LoadOp>(loc, rhs);

const unsigned rank = seqTy.getDimension();
llvm::SmallVector<mlir::Value> extents;
extents.reserve(rank);
llvm::SmallVector<mlir::Value> lbAndExtents;
lbAndExtents.reserve(rank * 2);

// Get box lowerbounds and extents:
mlir::Type idxTy = builder.getIndexType();
for (unsigned i = 0; i < rank; ++i) {
// TODO: ideally we want to hoist box reads out of the critical section.
// We could do this by having box dimensions in block arguments like
// OpenACC does
mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i);
auto dimInfo =
builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, lhs, dim);
extents.push_back(dimInfo.getExtent());
lbAndExtents.push_back(dimInfo.getLowerBound());
lbAndExtents.push_back(dimInfo.getExtent());
}

auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank);
auto shapeShift =
builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents);

// Iterate over array elements, applying the equivalent scalar reduction:

// A hlfir::elemental here gets inlined with a temporary so create the
// loop nest directly.
// This function already controls all of the code in this region so we
// know this won't miss any opportuinties for clever elemental inlining
hlfir::LoopNest nest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true);
builder.setInsertionPointToStart(nest.innerLoop.getBody());
mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy());
auto lhsEleAddr = builder.create<fir::ArrayCoorOp>(
loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{},
nest.oneBasedIndices, /*typeparms=*/mlir::ValueRange{});
auto rhsEleAddr = builder.create<fir::ArrayCoorOp>(
loc, refTy, rhs, shapeShift, /*slice=*/mlir::Value{},
nest.oneBasedIndices, /*typeparms=*/mlir::ValueRange{});
auto lhsEle = builder.create<fir::LoadOp>(loc, lhsEleAddr);
auto rhsEle = builder.create<fir::LoadOp>(loc, rhsEleAddr);
mlir::Value scalarReduction = ReductionProcessor::createScalarCombiner(
builder, loc, redId, refTy, lhsEle, rhsEle);
builder.create<fir::StoreOp>(loc, scalarReduction, lhsEleAddr);

builder.setInsertionPointAfter(nest.outerLoop);
builder.create<mlir::omp::YieldOp>(loc, lhsAddr);
}

// generate combiner region for reduction operations
static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc,
ReductionProcessor::ReductionIdentifier redId,
mlir::Type ty, mlir::Value lhs, mlir::Value rhs,
bool isByRef) {
ty = fir::unwrapRefType(ty);

if (fir::isa_trivial(ty)) {
mlir::Value lhsLoaded = builder.loadIfRef(loc, lhs);
mlir::Value rhsLoaded = builder.loadIfRef(loc, rhs);

mlir::Value result = ReductionProcessor::createScalarCombiner(
builder, loc, redId, ty, lhsLoaded, rhsLoaded);
if (isByRef) {
builder.create<fir::StoreOp>(loc, result, lhs);
builder.create<mlir::omp::YieldOp>(loc, lhs);
} else {
builder.create<mlir::omp::YieldOp>(loc, result);
}
return;
}
// all arrays should have been boxed
if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(ty)) {
genBoxCombiner(builder, loc, redId, boxTy, lhs, rhs);
return;
}

TODO(loc, "OpenMP genCombiner for unsupported reduction variable type");
}

static mlir::Value
createReductionInitRegion(fir::FirOpBuilder &builder, mlir::Location loc,
const ReductionProcessor::ReductionIdentifier redId,
mlir::Type type, bool isByRef) {
mlir::Type ty = fir::unwrapRefType(type);
mlir::Value initValue = ReductionProcessor::getReductionInitValue(
loc, fir::unwrapSeqOrBoxedSeqType(ty), redId, builder);

if (fir::isa_trivial(ty)) {
if (isByRef) {
mlir::Value alloca = builder.create<fir::AllocaOp>(loc, ty);
builder.createStoreWithConvert(loc, initValue, alloca);
return alloca;
}
// by val
return initValue;
}

// all arrays are boxed
if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) {
assert(isByRef && "passing arrays by value is unsupported");
// TODO: support allocatable arrays: !fir.box<!fir.heap<!fir.array<...>>>
mlir::Type innerTy = fir::extractSequenceType(boxTy);
if (!mlir::isa<fir::SequenceType>(innerTy))
TODO(loc, "Unsupported boxed type for reduction");
// Create the private copy from the initial fir.box:
hlfir::Entity source = hlfir::Entity{builder.getBlock()->getArgument(0)};

// TODO: if the whole reduction is nested inside of a loop, this alloca
// could lead to a stack overflow (the memory is only freed at the end of
// the stack frame). The reduction declare operation needs a deallocation
// region to undo the init region.
hlfir::Entity temp = createStackTempFromMold(loc, builder, source);

// Put the temporary inside of a box:
hlfir::Entity box = hlfir::genVariableBox(loc, builder, temp);
builder.create<hlfir::AssignOp>(loc, initValue, box);
mlir::Value boxAlloca = builder.create<fir::AllocaOp>(loc, ty);
builder.create<fir::StoreOp>(loc, box, boxAlloca);
return boxAlloca;
}

TODO(loc, "createReductionInitRegion for unsupported type");
}

mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
fir::FirOpBuilder &builder, llvm::StringRef reductionOpName,
const ReductionIdentifier redId, mlir::Type type, mlir::Location loc,
bool isByRef) {
mlir::OpBuilder::InsertionGuard guard(builder);
mlir::ModuleOp module = builder.getModule();

if (reductionOpName.empty())
TODO(loc, "Reduction of some types is not supported");

auto decl =
module.lookupSymbol<mlir::omp::ReductionDeclareOp>(reductionOpName);
if (decl)
Expand All @@ -304,14 +482,9 @@ mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
decl.getInitializerRegion().end(), {type}, {loc});
builder.setInsertionPointToEnd(&decl.getInitializerRegion().back());

mlir::Value init = getReductionInitValue(loc, type, redId, builder);
if (isByRef) {
mlir::Value alloca = builder.create<fir::AllocaOp>(loc, valTy);
builder.createStoreWithConvert(loc, init, alloca);
builder.create<mlir::omp::YieldOp>(loc, alloca);
} else {
builder.create<mlir::omp::YieldOp>(loc, init);
}
mlir::Value init =
createReductionInitRegion(builder, loc, redId, type, isByRef);
builder.create<mlir::omp::YieldOp>(loc, init);

builder.createBlock(&decl.getReductionRegion(),
decl.getReductionRegion().end(), {type, type},
Expand All @@ -320,19 +493,7 @@ mlir::omp::ReductionDeclareOp ReductionProcessor::createReductionDecl(
builder.setInsertionPointToEnd(&decl.getReductionRegion().back());
mlir::Value op1 = decl.getReductionRegion().front().getArgument(0);
mlir::Value op2 = decl.getReductionRegion().front().getArgument(1);
mlir::Value outAddr = op1;

op1 = builder.loadIfRef(loc, op1);
op2 = builder.loadIfRef(loc, op2);

mlir::Value reductionOp =
createScalarCombiner(builder, loc, redId, type, op1, op2);
if (isByRef) {
builder.create<fir::StoreOp>(loc, reductionOp, outAddr);
builder.create<mlir::omp::YieldOp>(loc, outAddr);
} else {
builder.create<mlir::omp::YieldOp>(loc, reductionOp);
}
genCombiner(builder, loc, redId, type, op1, op2, isByRef);

return decl;
}
Expand Down Expand Up @@ -387,13 +548,33 @@ void ReductionProcessor::addReductionDecl(

// initial pass to collect all reduction vars so we can figure out if this
// should happen byref
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
for (const Object &object : objectList) {
const Fortran::semantics::Symbol *symbol = object.id();
if (reductionSymbols)
reductionSymbols->push_back(symbol);
mlir::Value symVal = converter.getSymbolAddress(*symbol);
if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());

// all arrays must be boxed so that we have convenient access to all the
// information needed to iterate over the array
if (mlir::isa<fir::SequenceType>(redType.getEleTy())) {
hlfir::Entity entity{symVal};
entity = genVariableBox(currentLocation, builder, entity);
mlir::Value box = entity.getBase();

// Always pass the box by reference so that the OpenMP dialect
// verifiers don't need to know anything about fir.box
auto alloca =
builder.create<fir::AllocaOp>(currentLocation, box.getType());
builder.create<fir::StoreOp>(currentLocation, box, alloca);

symVal = alloca;
redType = mlir::cast<fir::ReferenceType>(symVal.getType());
} else if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>()) {
symVal = declOp.getBase();
}

reductionVars.push_back(symVal);
}
const bool isByRef = doReductionByRef(reductionVars);
Expand All @@ -418,24 +599,17 @@ void ReductionProcessor::addReductionDecl(
break;
}

for (const Object &object : objectList) {
const Fortran::semantics::Symbol *symbol = object.id();
mlir::Value symVal = converter.getSymbolAddress(*symbol);
if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
symVal = declOp.getBase();
auto redType = symVal.getType().cast<fir::ReferenceType>();
for (mlir::Value symVal : reductionVars) {
auto redType = mlir::cast<fir::ReferenceType>(symVal.getType());
if (redType.getEleTy().isa<fir::LogicalType>())
decl = createReductionDecl(
firOpBuilder,
getReductionName(intrinsicOp, firOpBuilder.getI1Type(), isByRef),
redId, redType, currentLocation, isByRef);
else if (redType.getEleTy().isIntOrIndexOrFloat()) {
else
decl = createReductionDecl(
firOpBuilder, getReductionName(intrinsicOp, redType, isByRef),
redId, redType, currentLocation, isByRef);
} else {
TODO(currentLocation, "Reduction of some types is not supported");
}
reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get(
firOpBuilder.getContext(), decl.getSymName()));
}
Expand All @@ -452,8 +626,8 @@ void ReductionProcessor::addReductionDecl(
if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>())
symVal = declOp.getBase();
auto redType = symVal.getType().cast<fir::ReferenceType>();
assert(redType.getEleTy().isIntOrIndexOrFloat() &&
"Unsupported reduction type");
if (!redType.getEleTy().isIntOrIndexOrFloat())
TODO(currentLocation, "User Defined Reduction on non-trivial type");
decl = createReductionDecl(
firOpBuilder,
getReductionName(getRealName(*reductionIntrinsic).ToString(),
Expand Down
2 changes: 1 addition & 1 deletion flang/lib/Lower/OpenMP/ReductionProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ class ReductionProcessor {
/// Creates an OpenMP reduction declaration and inserts it into the provided
/// symbol table. The declaration has a constant initializer with the neutral
/// value `initValue`, and the reduction combiner carried over from `reduce`.
/// TODO: Generalize this for non-integer types, add atomic region.
/// TODO: add atomic region.
static mlir::omp::ReductionDeclareOp
createReductionDecl(fir::FirOpBuilder &builder,
llvm::StringRef reductionOpName,
Expand Down
29 changes: 29 additions & 0 deletions flang/lib/Optimizer/Builder/HLFIRTools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1111,6 +1111,35 @@ hlfir::createTempFromMold(mlir::Location loc, fir::FirOpBuilder &builder,
return {hlfir::Entity{declareOp.getBase()}, isHeapAlloc};
}

hlfir::Entity hlfir::createStackTempFromMold(mlir::Location loc,
fir::FirOpBuilder &builder,
hlfir::Entity mold) {
llvm::SmallVector<mlir::Value> lenParams;
hlfir::genLengthParameters(loc, builder, mold, lenParams);
llvm::StringRef tmpName{".tmp"};
mlir::Value alloc;
mlir::Value shape{};
fir::FortranVariableFlagsAttr declAttrs;

if (mold.isPolymorphic()) {
// genAllocatableApplyMold does heap allocation
TODO(loc, "createStackTempFromMold for polymorphic type");
} else if (mold.isArray()) {
mlir::Type sequenceType =
hlfir::getFortranElementOrSequenceType(mold.getType());
shape = hlfir::genShape(loc, builder, mold);
auto extents = hlfir::getIndexExtents(loc, builder, shape);
alloc =
builder.createTemporary(loc, sequenceType, tmpName, extents, lenParams);
} else {
alloc = builder.createTemporary(loc, mold.getFortranElementType(), tmpName,
/*shape=*/std::nullopt, lenParams);
}
auto declareOp = builder.create<hlfir::DeclareOp>(loc, alloc, tmpName, shape,
lenParams, declAttrs);
return hlfir::Entity{declareOp.getBase()};
}

hlfir::EntityWithAttributes
hlfir::convertCharacterKind(mlir::Location loc, fir::FirOpBuilder &builder,
hlfir::Entity scalarChar, int toKind) {
Expand Down
15 changes: 0 additions & 15 deletions flang/test/Lower/OpenMP/Todo/reduction-arrays.f90

This file was deleted.

Loading

0 comments on commit 197f3ec

Please sign in to comment.