Skip to content

Commit

Permalink
[libomptarget-nvptx] loop: Determine if runtime uninitialized
Browse files Browse the repository at this point in the history
The generic entry points for static loop scheduling previously
hardcoded that the runtime was initialized. This can be wrong if
the compiler analyzes that the runtime is not needed and calls
the init functions accordingly.

This didn't affect clang-ykt because they have entry points for
different combinations of SPMD x Runtime not needed. I didn't do
measurements yet but with inlining we might get away with always
calling the generic interface and letting compiler and runtime
figure out the rest.
In any case, a correct runtime is always better than having
functions that may only be called if previous calls passed in
a specific set of arguments!

Differential Revision: https://reviews.llvm.org/D47131

llvm-svn: 333285
  • Loading branch information
hahnjo committed May 25, 2018
1 parent 65e0b87 commit 17aabf8
Showing 1 changed file with 42 additions and 38 deletions.
80 changes: 42 additions & 38 deletions openmp/libomptarget/deviceRTLs/nvptx/src/loop.cu
Expand Up @@ -96,32 +96,32 @@ public:
INLINE static void for_static_init(int32_t schedtype, int32_t *plastiter,
T *plower, T *pupper, ST *pstride,
ST chunk, bool IsSPMDExecutionMode,
bool IsOMPRuntimeUnavailable = false) {
// When IsOMPRuntimeUnavailable is true, we assume that the caller is
bool IsRuntimeUninitialized) {
// When IsRuntimeUninitialized is true, we assume that the caller is
// in an L0 parallel region and that all worker threads participate.

int tid = GetLogicalThreadIdInBlock();

// Assume we are in teams region or that we use a single block
// per target region
ST numberOfActiveOMPThreads = GetNumberOfOmpThreads(
tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable);
tid, IsSPMDExecutionMode, IsRuntimeUninitialized);

// All warps that are in excess of the maximum requested, do
// not execute the loop
PRINT(LD_LOOP,
"OMP Thread %d: schedule type %d, chunk size = %lld, mytid "
"%d, num tids %d\n",
GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable),
GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized),
schedtype, P64(chunk),
GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable),
GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized),
GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
IsOMPRuntimeUnavailable));
IsRuntimeUninitialized));
ASSERT0(
LT_FUSSY,
(GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable)) <
(GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized)) <
(GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
IsOMPRuntimeUnavailable)),
IsRuntimeUninitialized)),
"current thread is not needed here; error");

// copy
Expand All @@ -135,19 +135,19 @@ public:
case kmp_sched_static_chunk: {
if (chunk > 0) {
entityId =
GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable);
GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
IsOMPRuntimeUnavailable);
IsRuntimeUninitialized);
ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
numberOfEntities);
break;
}
} // note: if chunk <=0, use nochunk
case kmp_sched_static_nochunk: {
entityId =
GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable);
GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
IsOMPRuntimeUnavailable);
IsRuntimeUninitialized);
ForStaticNoChunk(lastiter, lb, ub, stride, chunk, entityId,
numberOfEntities);
break;
Expand All @@ -172,12 +172,12 @@ public:
case kmp_sched_distr_static_chunk_sched_static_chunkone: {
entityId =
GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
IsOMPRuntimeUnavailable) *
IsRuntimeUninitialized) *
GetOmpTeamId() +
GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable);
GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
numberOfEntities = GetNumberOfOmpTeams() *
GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
IsOMPRuntimeUnavailable);
IsRuntimeUninitialized);
ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
numberOfEntities);
break;
Expand All @@ -187,9 +187,9 @@ public:
PRINT(LD_LOOP, "unknown schedtype %d, revert back to static chunk\n",
schedtype);
entityId =
GetOmpThreadId(tid, IsSPMDExecutionMode, IsOMPRuntimeUnavailable);
GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
IsOMPRuntimeUnavailable);
IsRuntimeUninitialized);
ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
numberOfEntities);
}
Expand All @@ -202,7 +202,7 @@ public:
PRINT(LD_LOOP,
"Got sched: Active %d, total %d: lb %lld, ub %lld, stride %lld\n",
GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
IsOMPRuntimeUnavailable),
IsRuntimeUninitialized),
GetNumberOfWorkersInTeam(), P64(*plower), P64(*pupper),
P64(*pstride));
}
Expand Down Expand Up @@ -574,7 +574,8 @@ EXTERN void __kmpc_for_static_init_4(kmp_Indent *loc, int32_t global_tid,
int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4\n");
omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode());
schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
isRuntimeUninitialized());
}

EXTERN void __kmpc_for_static_init_4u(kmp_Indent *loc, int32_t global_tid,
Expand All @@ -584,7 +585,8 @@ EXTERN void __kmpc_for_static_init_4u(kmp_Indent *loc, int32_t global_tid,
int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4u\n");
omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode());
schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
isRuntimeUninitialized());
}

EXTERN void __kmpc_for_static_init_8(kmp_Indent *loc, int32_t global_tid,
Expand All @@ -594,7 +596,8 @@ EXTERN void __kmpc_for_static_init_8(kmp_Indent *loc, int32_t global_tid,
int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8\n");
omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode());
schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
isRuntimeUninitialized());
}

EXTERN void __kmpc_for_static_init_8u(kmp_Indent *loc, int32_t global_tid,
Expand All @@ -604,7 +607,8 @@ EXTERN void __kmpc_for_static_init_8u(kmp_Indent *loc, int32_t global_tid,
int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8u\n");
omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode());
schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
isRuntimeUninitialized());
}

EXTERN
Expand All @@ -616,8 +620,8 @@ void __kmpc_for_static_init_4_simple_spmd(kmp_Indent *loc, int32_t global_tid,
PRINT0(LD_IO, "call kmpc_for_static_init_4_simple_spmd\n");
omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
/*isSPMDExecutionMode=*/true,
/*IsOMPRuntimeUnavailable=*/true);
/*IsSPMDExecutionMode=*/true,
/*IsRuntimeUninitialized=*/true);
}

EXTERN
Expand All @@ -629,8 +633,8 @@ void __kmpc_for_static_init_4u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
PRINT0(LD_IO, "call kmpc_for_static_init_4u_simple_spmd\n");
omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
/*isSPMDExecutionMode=*/true,
/*IsOMPRuntimeUnavailable=*/true);
/*IsSPMDExecutionMode=*/true,
/*IsRuntimeUninitialized=*/true);
}

EXTERN
Expand All @@ -642,8 +646,8 @@ void __kmpc_for_static_init_8_simple_spmd(kmp_Indent *loc, int32_t global_tid,
PRINT0(LD_IO, "call kmpc_for_static_init_8_simple_spmd\n");
omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
/*isSPMDExecutionMode=*/true,
/*IsOMPRuntimeUnavailable=*/true);
/*IsSPMDExecutionMode=*/true,
/*IsRuntimeUninitialized=*/true);
}

EXTERN
Expand All @@ -655,8 +659,8 @@ void __kmpc_for_static_init_8u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
PRINT0(LD_IO, "call kmpc_for_static_init_8u_simple_spmd\n");
omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
/*isSPMDExecutionMode=*/true,
/*IsOMPRuntimeUnavailable=*/true);
/*IsSPMDExecutionMode=*/true,
/*IsRuntimeUninitialized=*/true);
}

EXTERN
Expand All @@ -667,8 +671,8 @@ void __kmpc_for_static_init_4_simple_generic(
PRINT0(LD_IO, "call kmpc_for_static_init_4_simple_generic\n");
omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
/*isSPMDExecutionMode=*/false,
/*IsOMPRuntimeUnavailable=*/true);
/*IsSPMDExecutionMode=*/false,
/*IsRuntimeUninitialized=*/true);
}

EXTERN
Expand All @@ -679,8 +683,8 @@ void __kmpc_for_static_init_4u_simple_generic(
PRINT0(LD_IO, "call kmpc_for_static_init_4u_simple_generic\n");
omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
/*isSPMDExecutionMode=*/false,
/*IsOMPRuntimeUnavailable=*/true);
/*IsSPMDExecutionMode=*/false,
/*IsRuntimeUninitialized=*/true);
}

EXTERN
Expand All @@ -691,8 +695,8 @@ void __kmpc_for_static_init_8_simple_generic(
PRINT0(LD_IO, "call kmpc_for_static_init_8_simple_generic\n");
omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
/*isSPMDExecutionMode=*/false,
/*IsOMPRuntimeUnavailable=*/true);
/*IsSPMDExecutionMode=*/false,
/*IsRuntimeUninitialized=*/true);
}

EXTERN
Expand All @@ -703,8 +707,8 @@ void __kmpc_for_static_init_8u_simple_generic(
PRINT0(LD_IO, "call kmpc_for_static_init_8u_simple_generic\n");
omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
schedtype, plastiter, plower, pupper, pstride, chunk,
/*isSPMDExecutionMode=*/false,
/*IsOMPRuntimeUnavailable=*/true);
/*IsSPMDExecutionMode=*/false,
/*IsRuntimeUninitialized=*/true);
}

EXTERN void __kmpc_for_static_fini(kmp_Indent *loc, int32_t global_tid) {
Expand Down

0 comments on commit 17aabf8

Please sign in to comment.