Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 116 additions & 10 deletions flang/lib/Lower/OpenMP/ClauseProcessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1072,27 +1072,133 @@ bool ClauseProcessor::processEnter(
}

bool ClauseProcessor::processUseDeviceAddr(
Fortran::lower::StatementContext &stmtCtx,
mlir::omp::UseDeviceAddrClauseOps &result,
llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const {
return findRepeatableClause<omp::clause::UseDeviceAddr>(
[&](const omp::clause::UseDeviceAddr &clause, const parser::CharBlock &) {
addUseDeviceClause(converter, clause.v, result.useDeviceAddrVars,
useDeviceTypes, useDeviceLocs, useDeviceSyms);
llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &useDeviceSyms)
const {
std::map<const Fortran::semantics::Symbol *,
llvm::SmallVector<OmpMapMemberIndicesData>>
parentMemberIndices;
bool clauseFound = findRepeatableClause<omp::clause::UseDeviceAddr>(
[&](const omp::clause::UseDeviceAddr &clause,
const Fortran::parser::CharBlock &) {
const Fortran::parser::CharBlock source;
mlir::Location location = converter.genLocation(source);
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
for (const omp::Object &object : clause.v) {
llvm::SmallVector<mlir::Value> bounds;
std::stringstream asFortran;

Fortran::lower::AddrAndBoundsInfo info =
Fortran::lower::gatherDataOperandAddrAndBounds<
mlir::omp::MapBoundsOp, mlir::omp::MapBoundsType>(
converter, firOpBuilder, semaCtx, stmtCtx, *object.sym(),
object.ref(), location, asFortran, bounds,
treatIndexAsSection);

auto origSymbol = converter.getSymbolAddress(*object.sym());
mlir::Value symAddr = info.addr;
if (origSymbol && fir::isTypeWithDescriptor(origSymbol.getType()))
symAddr = origSymbol;

// Explicit map captures are captured ByRef by default,
// optimisation passes may alter this to ByCopy or other capture
// types to optimise
mlir::omp::MapInfoOp mapOp = createMapInfoOp(
firOpBuilder, location, symAddr,
/*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds,
/*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{},
static_cast<
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
mapTypeBits),
mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());

if (object.sym()->owner().IsDerivedType()) {
addChildIndexAndMapToParent(object, parentMemberIndices, mapOp,
semaCtx);
} else {
useDeviceSyms.push_back(object.sym());
useDeviceTypes.push_back(symAddr.getType());
useDeviceLocs.push_back(symAddr.getLoc());
result.useDeviceAddrVars.push_back(mapOp);
}
}
});

insertChildMapInfoIntoParent(converter, parentMemberIndices,
result.useDeviceAddrVars, useDeviceSyms,
&useDeviceTypes, &useDeviceLocs);
return clauseFound;
}

bool ClauseProcessor::processUseDevicePtr(
Fortran::lower::StatementContext &stmtCtx,
mlir::omp::UseDevicePtrClauseOps &result,
llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const {
return findRepeatableClause<omp::clause::UseDevicePtr>(
[&](const omp::clause::UseDevicePtr &clause, const parser::CharBlock &) {
addUseDeviceClause(converter, clause.v, result.useDevicePtrVars,
useDeviceTypes, useDeviceLocs, useDeviceSyms);
llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> &useDeviceSyms)
const {
std::map<const Fortran::semantics::Symbol *,
llvm::SmallVector<OmpMapMemberIndicesData>>
parentMemberIndices;
bool clauseFound = findRepeatableClause<omp::clause::UseDevicePtr>(
[&](const omp::clause::UseDevicePtr &clause,
const Fortran::parser::CharBlock &) {
const Fortran::parser::CharBlock source;
mlir::Location location = converter.genLocation(source);
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
llvm::omp::OpenMPOffloadMappingFlags mapTypeBits =
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_TO |
llvm::omp::OpenMPOffloadMappingFlags::OMP_MAP_FROM;
for (const omp::Object &object : clause.v) {
llvm::SmallVector<mlir::Value> bounds;
std::stringstream asFortran;

Fortran::lower::AddrAndBoundsInfo info =
Fortran::lower::gatherDataOperandAddrAndBounds<
mlir::omp::MapBoundsOp, mlir::omp::MapBoundsType>(
converter, firOpBuilder, semaCtx, stmtCtx, *object.sym(),
object.ref(), location, asFortran, bounds,
treatIndexAsSection);

auto origSymbol = converter.getSymbolAddress(*object.sym());
mlir::Value symAddr = info.addr;
if (origSymbol && fir::isTypeWithDescriptor(origSymbol.getType()))
symAddr = origSymbol;

// Explicit map captures are captured ByRef by default,
// optimisation passes may alter this to ByCopy or other capture
// types to optimise
mlir::omp::MapInfoOp mapOp = createMapInfoOp(
firOpBuilder, location, symAddr,
/*varPtrPtr=*/mlir::Value{}, asFortran.str(), bounds,
/*members=*/{}, /*membersIndex=*/mlir::DenseIntElementsAttr{},
static_cast<
std::underlying_type_t<llvm::omp::OpenMPOffloadMappingFlags>>(
mapTypeBits),
mlir::omp::VariableCaptureKind::ByRef, symAddr.getType());

if (object.sym()->owner().IsDerivedType()) {
addChildIndexAndMapToParent(object, parentMemberIndices, mapOp,
semaCtx);
} else {
useDeviceSyms.push_back(object.sym());
useDeviceTypes.push_back(symAddr.getType());
useDeviceLocs.push_back(symAddr.getLoc());
result.useDevicePtrVars.push_back(mapOp);
}
}
});

insertChildMapInfoIntoParent(converter, parentMemberIndices,
result.useDevicePtrVars, useDeviceSyms,
&useDeviceTypes, &useDeviceLocs);
return clauseFound;
}

} // namespace omp
Expand Down
24 changes: 14 additions & 10 deletions flang/lib/Lower/OpenMP/ClauseProcessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,16 +128,20 @@ class ClauseProcessor {
llvm::SmallVectorImpl<const semantics::Symbol *> *reductionSyms =
nullptr) const;
bool processTo(llvm::SmallVectorImpl<DeclareTargetCapturePair> &result) const;
bool processUseDeviceAddr(
mlir::omp::UseDeviceAddrClauseOps &result,
llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const;
bool processUseDevicePtr(
mlir::omp::UseDevicePtrClauseOps &result,
llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
llvm::SmallVectorImpl<const semantics::Symbol *> &useDeviceSyms) const;
bool
processUseDeviceAddr(Fortran::lower::StatementContext &stmtCtx,
mlir::omp::UseDeviceAddrClauseOps &result,
llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
&useDeviceSyms) const;
bool
processUseDevicePtr(Fortran::lower::StatementContext &stmtCtx,
mlir::omp::UseDevicePtrClauseOps &result,
llvm::SmallVectorImpl<mlir::Type> &useDeviceTypes,
llvm::SmallVectorImpl<mlir::Location> &useDeviceLocs,
llvm::SmallVectorImpl<const Fortran::semantics::Symbol *>
&useDeviceSyms) const;

template <typename T>
bool processMotionClauses(lower::StatementContext &stmtCtx,
Expand Down
87 changes: 64 additions & 23 deletions flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -702,32 +702,73 @@ static void genBodyOfTargetDataOp(
fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder();
mlir::Region &region = dataOp.getRegion();

firOpBuilder.createBlock(&region, {}, useDeviceTypes, useDeviceLocs);
auto *regionBlock =
firOpBuilder.createBlock(&region, {}, useDeviceTypes, useDeviceLocs);

// Clones the `bounds` placing them inside the target region and returns them.
auto cloneBound = [&](mlir::Value bound) {
if (mlir::isMemoryEffectFree(bound.getDefiningOp())) {
mlir::Operation *clonedOp = bound.getDefiningOp()->clone();
regionBlock->push_back(clonedOp);
return clonedOp->getResult(0);
}
TODO(converter.getCurrentLocation(),
"target map clause operand unsupported bound type");
};

auto cloneBounds = [cloneBound](llvm::ArrayRef<mlir::Value> bounds) {
llvm::SmallVector<mlir::Value> clonedBounds;
for (mlir::Value bound : bounds)
clonedBounds.emplace_back(cloneBound(bound));
return clonedBounds;
};

for (auto [argIndex, argSymbol] : llvm::enumerate(useDeviceSymbols)) {
const mlir::BlockArgument &arg = region.front().getArgument(argIndex);
fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*argSymbol);
if (auto refType = mlir::dyn_cast<fir::ReferenceType>(arg.getType())) {
if (fir::isa_builtin_cptr_type(refType.getElementType())) {
converter.bindSymbol(*argSymbol, arg);
} else {
// Avoid capture of a reference to a structured binding.
const semantics::Symbol *sym = argSymbol;
extVal.match(
[&](const fir::MutableBoxValue &mbv) {
converter.bindSymbol(
*sym,
fir::MutableBoxValue(
arg, fir::factory::getNonDeferredLenParams(extVal), {}));
},
[&](const auto &) {
TODO(converter.getCurrentLocation(),
"use_device clause operand unsupported type");
});
}
auto refType = mlir::dyn_cast<fir::ReferenceType>(arg.getType());
if (refType && fir::isa_builtin_cptr_type(refType.getElementType())) {
converter.bindSymbol(*argSymbol, arg);
} else {
TODO(converter.getCurrentLocation(),
"use_device clause operand unsupported type");
// Avoid capture of a reference to a structured binding.
const Fortran::semantics::Symbol *sym = argSymbol;
// Structure component symbols don't have bindings.
if (sym->owner().IsDerivedType())
continue;
fir::ExtendedValue extVal = converter.getSymbolExtendedValue(*sym);
extVal.match(
[&](const fir::BoxValue &v) {
converter.bindSymbol(*sym,
fir::BoxValue(arg, cloneBounds(v.getLBounds()),
v.getExplicitParameters(),
v.getExplicitExtents()));
},
[&](const fir::MutableBoxValue &v) {
converter.bindSymbol(
*sym, fir::MutableBoxValue(arg, cloneBounds(v.getLBounds()),
v.getMutableProperties()));
},
[&](const fir::ArrayBoxValue &v) {
converter.bindSymbol(
*sym, fir::ArrayBoxValue(arg, cloneBounds(v.getExtents()),
cloneBounds(v.getLBounds()),
v.getSourceBox()));
},
[&](const fir::CharArrayBoxValue &v) {
converter.bindSymbol(
*sym, fir::CharArrayBoxValue(arg, cloneBound(v.getLen()),
cloneBounds(v.getExtents()),
cloneBounds(v.getLBounds())));
},
[&](const fir::CharBoxValue &v) {
converter.bindSymbol(
*sym, fir::CharBoxValue(arg, cloneBound(v.getLen())));
},
[&](const fir::UnboxedValue &v) { converter.bindSymbol(*sym, arg); },
[&](const auto &) {
TODO(converter.getCurrentLocation(),
"target map clause operand unsupported type");
});
}
}

Expand Down Expand Up @@ -1191,9 +1232,9 @@ static void genTargetDataClauses(
cp.processDevice(stmtCtx, clauseOps);
cp.processIf(llvm::omp::Directive::OMPD_target_data, clauseOps);
cp.processMap(loc, stmtCtx, clauseOps);
cp.processUseDeviceAddr(clauseOps, useDeviceTypes, useDeviceLocs,
cp.processUseDeviceAddr(stmtCtx, clauseOps, useDeviceTypes, useDeviceLocs,
useDeviceSyms);
cp.processUseDevicePtr(clauseOps, useDeviceTypes, useDeviceLocs,
cp.processUseDevicePtr(stmtCtx, clauseOps, useDeviceTypes, useDeviceLocs,
useDeviceSyms);

// This function implements the deprecated functionality of use_device_ptr
Expand Down
37 changes: 26 additions & 11 deletions flang/lib/Optimizer/Transforms/OMPMapInfoFinalization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,27 +106,42 @@ class OMPMapInfoFinalizationPass
// TODO: map the addendum segment of the descriptor, similarly to the
// above base address/data pointer member.

if (auto mapClauseOwner =
llvm::dyn_cast<mlir::omp::MapClauseOwningOpInterface>(target)) {
auto addOperands = [&](mlir::OperandRange &operandsArr,
mlir::MutableOperandRange &mutableOpRange,
auto directiveOp) {
llvm::SmallVector<mlir::Value> newMapOps;
mlir::OperandRange mapVarsArr = mapClauseOwner.getMapVars();

for (size_t i = 0; i < mapVarsArr.size(); ++i) {
if (mapVarsArr[i] == op) {
for (size_t i = 0; i < operandsArr.size(); ++i) {
if (operandsArr[i] == op) {
// Push new implicit maps generated for the descriptor.
newMapOps.push_back(baseAddr);

// for TargetOp's which have IsolatedFromAbove we must align the
// new additional map operand with an appropriate BlockArgument,
// as the printing and later processing currently requires a 1:1
// mapping of BlockArgs to MapInfoOp's at the same placement in
// each array (BlockArgs and MapVars).
if (auto targetOp = llvm::dyn_cast<mlir::omp::TargetOp>(target))
targetOp.getRegion().insertArgument(i, baseAddr.getType(), loc);
// each array (BlockArgs and MapOperands).
if (directiveOp) {
directiveOp.getRegion().insertArgument(i, baseAddr.getType(), loc);
}
}
newMapOps.push_back(mapVarsArr[i]);
newMapOps.push_back(operandsArr[i]);
}
mapClauseOwner.getMapVarsMutable().assign(newMapOps);
mutableOpRange.assign(newMapOps);
};
if (auto mapClauseOwner =
llvm::dyn_cast<mlir::omp::MapClauseOwningOpInterface>(target)) {
mlir::OperandRange mapOperandsArr = mapClauseOwner.getMapVars();
mlir::MutableOperandRange mapMutableOpRange =
mapClauseOwner.getMapVarsMutable();
mlir::omp::TargetOp targetOp =
llvm::dyn_cast<mlir::omp::TargetOp>(target);
addOperands(mapOperandsArr, mapMutableOpRange, targetOp);
}
if (auto targetDataOp = llvm::dyn_cast<mlir::omp::TargetDataOp>(target)) {
mlir::OperandRange useDevAddrArr = targetDataOp.getUseDeviceAddrVars();
mlir::MutableOperandRange useDevAddrMutableOpRange =
targetDataOp.getUseDeviceAddrVarsMutable();
addOperands(useDevAddrArr, useDevAddrMutableOpRange, targetDataOp);
}

mlir::Value newDescParentMapOp = builder.create<mlir::omp::MapInfoOp>(
Expand Down
12 changes: 7 additions & 5 deletions flang/test/Lower/OpenMP/target.f90
Original file line number Diff line number Diff line change
Expand Up @@ -527,21 +527,23 @@ end subroutine omp_target_device_ptr
!===============================================================================

!CHECK-LABEL: func.func @_QPomp_target_device_addr() {
subroutine omp_target_device_addr
subroutine omp_target_device_addr
integer, pointer :: a
!CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box<!fir.ptr<i32>> {bindc_name = "a", uniq_name = "_QFomp_target_device_addrEa"}
!CHECK: %[[VAL_0_DECL:.*]]:2 = hlfir.declare %0 {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFomp_target_device_addrEa"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
!CHECK: %[[MAP_MEMBERS:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, i32) var_ptr_ptr({{.*}} : !fir.llvm_ptr<!fir.ref<i32>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<i32>> {name = ""}
!CHECK: %[[MAP:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(tofrom) capture(ByRef) members(%[[MAP_MEMBERS]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "a"}
!CHECK: omp.target_data use_device_addr(%[[VAL_0_DECL]]#1 : !fir.ref<!fir.box<!fir.ptr<i32>>>) map_entries(%[[MAP_MEMBERS]], %[[MAP]] : {{.*}}) {
!CHECK: %[[DEV_ADDR_MEMBERS:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, i32) var_ptr_ptr({{.*}} : !fir.llvm_ptr<!fir.ref<i32>>) map_clauses(tofrom) capture(ByRef) -> !fir.llvm_ptr<!fir.ref<i32>> {name = ""}
!CHECK: %[[DEV_ADDR:.*]] = omp.map.info var_ptr({{.*}} : !fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.box<!fir.ptr<i32>>) map_clauses(tofrom) capture(ByRef) members(%[[DEV_ADDR_MEMBERS]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.ptr<i32>>> {name = "a"}
!CHECK: omp.target_data use_device_addr(%[[DEV_ADDR_MEMBERS]], %[[DEV_ADDR]] : {{.*}}) map_entries(%[[MAP_MEMBERS]], %[[MAP]] : {{.*}}) {
!$omp target data map(tofrom: a) use_device_addr(a)
!CHECK: ^bb0(%[[VAL_1:.*]]: !fir.ref<!fir.box<!fir.ptr<i32>>>):
!CHECK: %[[VAL_1_DECL:.*]]:2 = hlfir.declare %[[VAL_1]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFomp_target_device_addrEa"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
!CHECK: ^bb0(%[[ARG_0:.*]]: !fir.llvm_ptr<!fir.ref<i32>>, %[[ARG_1:.*]]: !fir.ref<!fir.box<!fir.ptr<i32>>>):
!CHECK: %[[VAL_1_DECL:.*]]:2 = hlfir.declare %[[ARG_1]] {fortran_attrs = #fir.var_attrs<pointer>, uniq_name = "_QFomp_target_device_addrEa"} : (!fir.ref<!fir.box<!fir.ptr<i32>>>) -> (!fir.ref<!fir.box<!fir.ptr<i32>>>, !fir.ref<!fir.box<!fir.ptr<i32>>>)
!CHECK: %[[C10:.*]] = arith.constant 10 : i32
!CHECK: %[[A_BOX:.*]] = fir.load %[[VAL_1_DECL]]#0 : !fir.ref<!fir.box<!fir.ptr<i32>>>
!CHECK: %[[A_ADDR:.*]] = fir.box_addr %[[A_BOX]] : (!fir.box<!fir.ptr<i32>>) -> !fir.ptr<i32>
!CHECK: hlfir.assign %[[C10]] to %[[A_ADDR]] : i32, !fir.ptr<i32>
a = 10
a = 10
!CHECK: omp.terminator
!$omp end target data
!CHECK: }
Expand Down
Loading