Skip to content

Commit

Permalink
[OpenMP] Introduce a new worksharing RTL function for distribute
Browse files Browse the repository at this point in the history
This patch adds a new RTL function for worksharing. Currently we use
`__kmpc_for_static_init` for both the `distribute` and `parallel`
portion of the loop clause. This patch replaces the `distribute` portion
with a new runtime call `__kmpc_distribute_static_init`. Currently this
will be used exactly the same way, but will make it easier in the future
to fine-tune the distribute and parallel portion of the loop.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D110429
  • Loading branch information
jhuber6 committed Sep 27, 2021
1 parent be2a421 commit b4a5543
Show file tree
Hide file tree
Showing 10 changed files with 618 additions and 564 deletions.
43 changes: 32 additions & 11 deletions clang/lib/CodeGen/CGOpenMPRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1560,13 +1560,22 @@ llvm::Type *CGOpenMPRuntime::getKmpc_MicroPointerTy() {
}

llvm::FunctionCallee
CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned) {
CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize, bool IVSigned,
bool IsGPUDistribute) {
assert((IVSize == 32 || IVSize == 64) &&
"IV size is not compatible with the omp runtime");
StringRef Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
: "__kmpc_for_static_init_4u")
: (IVSigned ? "__kmpc_for_static_init_8"
: "__kmpc_for_static_init_8u");
StringRef Name;
if (IsGPUDistribute)
Name = IVSize == 32 ? (IVSigned ? "__kmpc_distribute_static_init_4"
: "__kmpc_distribute_static_init_4u")
: (IVSigned ? "__kmpc_distribute_static_init_8"
: "__kmpc_distribute_static_init_8u");
else
Name = IVSize == 32 ? (IVSigned ? "__kmpc_for_static_init_4"
: "__kmpc_for_static_init_4u")
: (IVSigned ? "__kmpc_for_static_init_8"
: "__kmpc_for_static_init_8u");

llvm::Type *ITy = IVSize == 32 ? CGM.Int32Ty : CGM.Int64Ty;
auto *PtrTy = llvm::PointerType::getUnqual(ITy);
llvm::Type *TypeParams[] = {
Expand Down Expand Up @@ -2826,7 +2835,7 @@ void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
: OMP_IDENT_WORK_SECTIONS);
llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::FunctionCallee StaticInitFunction =
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
createForStaticInitFunction(Values.IVSize, Values.IVSigned, false);
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, ScheduleKind.M1, ScheduleKind.M2, Values);
Expand All @@ -2841,8 +2850,13 @@ void CGOpenMPRuntime::emitDistributeStaticInit(
llvm::Value *UpdatedLocation =
emitUpdateLocation(CGF, Loc, OMP_IDENT_WORK_DISTRIBUTE);
llvm::Value *ThreadId = getThreadID(CGF, Loc);
llvm::FunctionCallee StaticInitFunction =
createForStaticInitFunction(Values.IVSize, Values.IVSigned);
llvm::FunctionCallee StaticInitFunction;
bool isGPUDistribute =
CGM.getLangOpts().OpenMPIsDevice &&
(CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX());
StaticInitFunction = createForStaticInitFunction(
Values.IVSize, Values.IVSigned, isGPUDistribute);

emitForStaticInitCall(CGF, UpdatedLocation, ThreadId, StaticInitFunction,
ScheduleNum, OMPC_SCHEDULE_MODIFIER_unknown,
OMPC_SCHEDULE_MODIFIER_unknown, Values);
Expand All @@ -2863,9 +2877,16 @@ void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
: OMP_IDENT_WORK_SECTIONS),
getThreadID(CGF, Loc)};
auto DL = ApplyDebugLocation::CreateDefaultArtificial(CGF, Loc);
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_for_static_fini),
Args);
if (isOpenMPDistributeDirective(DKind) && CGM.getLangOpts().OpenMPIsDevice &&
(CGM.getTriple().isAMDGCN() || CGM.getTriple().isNVPTX()))
CGF.EmitRuntimeCall(
OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_distribute_static_fini),
Args);
else
CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
CGM.getModule(), OMPRTL___kmpc_for_static_fini),
Args);
}

void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
Expand Down
6 changes: 4 additions & 2 deletions clang/lib/CodeGen/CGOpenMPRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -795,9 +795,11 @@ class CGOpenMPRuntime {
llvm::Type *getKmpc_MicroPointerTy();

/// Returns __kmpc_for_static_init_* runtime function for the specified
/// size \a IVSize and sign \a IVSigned.
/// size \a IVSize and sign \a IVSigned. Will create a distribute call
/// __kmpc_distribute_static_init* if \a IsGPUDistribute is set.
llvm::FunctionCallee createForStaticInitFunction(unsigned IVSize,
bool IVSigned);
bool IVSigned,
bool IsGPUDistribute);

/// Returns __kmpc_dispatch_init_* runtime function for the specified
/// size \a IVSize and sign \a IVSigned.
Expand Down
Loading

0 comments on commit b4a5543

Please sign in to comment.