Skip to content

Commit

Permalink
[OpenMP] Remove checkXXXX device runtime functions
Browse files Browse the repository at this point in the history
We had multiple functions to determine the execution mode (SPMD/Generic)
and runtime status (initialized/uninitialized) but that just increased
complexity without a real benefit. Especially with D102307 in mind it
is helpful to reduce the dependence on the `ident_t` flags.

Differential Revision: https://reviews.llvm.org/D105586
  • Loading branch information
jdoerfert committed Jul 10, 2021
1 parent 9663427 commit e603ca0
Show file tree
Hide file tree
Showing 7 changed files with 39 additions and 99 deletions.
24 changes: 12 additions & 12 deletions openmp/libomptarget/deviceRTLs/common/src/loop.cu
Original file line number Diff line number Diff line change
Expand Up @@ -204,15 +204,15 @@ public:
INLINE static void dispatch_init(kmp_Ident *loc, int32_t threadId,
kmp_sched_t schedule, T lb, T ub, ST st,
ST chunk) {
if (checkRuntimeUninitialized(loc)) {
if (isRuntimeUninitialized()) {
// In SPMD mode no need to check parallelism level - dynamic scheduling
// may appear only in L2 parallel regions with lightweight runtime.
ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected non-SPMD mode.");
ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(), "Expected non-SPMD mode.");
return;
}
int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(tid);
T tnum = GetNumberOfOmpThreads(checkSPMDMode(loc));
T tnum = GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode());
T tripCount = ub - lb + 1; // +1 because ub is inclusive
ASSERT0(LT_FUSSY, threadId < tnum,
"current thread is not needed here; error");
Expand Down Expand Up @@ -441,10 +441,10 @@ public:

INLINE static int dispatch_next(kmp_Ident *loc, int32_t gtid, int32_t *plast,
T *plower, T *pupper, ST *pstride) {
if (checkRuntimeUninitialized(loc)) {
if (isRuntimeUninitialized()) {
// In SPMD mode no need to check parallelism level - dynamic scheduling
// may appear only in L2 parallel regions with lightweight runtime.
ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected non-SPMD mode.");
ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(), "Expected non-SPMD mode.");
if (*plast)
return DISPATCH_FINISHED;
*plast = 1;
Expand All @@ -453,8 +453,8 @@ public:
// ID of a thread in its own warp

// automatically selects thread or warp ID based on selected implementation
int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
ASSERT0(LT_FUSSY, gtid < GetNumberOfOmpThreads(checkSPMDMode(loc)),
int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
ASSERT0(LT_FUSSY, gtid < GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode()),
"current thread is not needed here; error");
// retrieve schedule
kmp_sched_t schedule =
Expand Down Expand Up @@ -624,7 +624,7 @@ EXTERN void __kmpc_for_static_init_4(kmp_Ident *loc, int32_t global_tid,
PRINT0(LD_IO, "call kmpc_for_static_init_4\n");
omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
checkSPMDMode(loc));
__kmpc_is_spmd_exec_mode());
}

EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid,
Expand All @@ -635,7 +635,7 @@ EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid,
PRINT0(LD_IO, "call kmpc_for_static_init_4u\n");
omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
checkSPMDMode(loc));
__kmpc_is_spmd_exec_mode());
}

EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid,
Expand All @@ -646,7 +646,7 @@ EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid,
PRINT0(LD_IO, "call kmpc_for_static_init_8\n");
omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
checkSPMDMode(loc));
__kmpc_is_spmd_exec_mode());
}

EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid,
Expand All @@ -657,7 +657,7 @@ EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid,
PRINT0(LD_IO, "call kmpc_for_static_init_8u\n");
omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
checkSPMDMode(loc));
__kmpc_is_spmd_exec_mode());
}

EXTERN
Expand Down
20 changes: 10 additions & 10 deletions openmp/libomptarget/deviceRTLs/common/src/parallel.cu
Original file line number Diff line number Diff line change
Expand Up @@ -181,14 +181,14 @@ EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid) {

IncParallelLevel(/*ActiveParallel=*/false, __kmpc_impl_activemask());

if (checkRuntimeUninitialized(loc)) {
ASSERT0(LT_FUSSY, checkSPMDMode(loc),
if (isRuntimeUninitialized()) {
ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(),
"Expected SPMD mode with uninitialized runtime.");
return;
}

// assume this is only called for nested parallel
int threadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
int threadId = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());

// unlike actual parallel, threads in the same team do not share
// the workTaskDescr in this case and num threads is fixed to 1
Expand Down Expand Up @@ -220,14 +220,14 @@ EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc,

DecParallelLevel(/*ActiveParallel=*/false, __kmpc_impl_activemask());

if (checkRuntimeUninitialized(loc)) {
ASSERT0(LT_FUSSY, checkSPMDMode(loc),
if (isRuntimeUninitialized()) {
ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(),
"Expected SPMD mode with uninitialized runtime.");
return;
}

// pop stack
int threadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
int threadId = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(threadId);
// set new top
omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(
Expand All @@ -249,8 +249,8 @@ EXTERN uint16_t __kmpc_parallel_level(kmp_Ident *loc, uint32_t global_tid) {
// it's cheap to recalculate this value so we never use the result
// of this call.
EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc) {
int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
return GetOmpThreadId(tid, checkSPMDMode(loc));
int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
return GetOmpThreadId(tid, __kmpc_is_spmd_exec_mode());
}

////////////////////////////////////////////////////////////////////////////////
Expand All @@ -260,9 +260,9 @@ EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc) {
EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t tid,
int32_t num_threads) {
PRINT(LD_IO, "call kmpc_push_num_threads %d\n", num_threads);
ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
ASSERT0(LT_FUSSY, isRuntimeInitialized(),
"Runtime must be initialized.");
tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(tid) =
num_threads;
}
Expand Down
12 changes: 6 additions & 6 deletions openmp/libomptarget/deviceRTLs/common/src/reduction.cu
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,11 @@ int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(
kmp_InterWarpCopyFctPtr cpyFct) {
return nvptx_parallel_reduce_nowait(
global_tid, num_vars, reduce_size, reduce_data, shflFct, cpyFct,
checkSPMDMode(loc), checkRuntimeUninitialized(loc));
__kmpc_is_spmd_exec_mode(), isRuntimeUninitialized());
}

INLINE static bool isMaster(kmp_Ident *loc, uint32_t ThreadId) {
return checkGenericMode(loc) || IsTeamMaster(ThreadId);
return !__kmpc_is_spmd_exec_mode() || IsTeamMaster(ThreadId);
}

INLINE static uint32_t roundToWarpsize(uint32_t s) {
Expand All @@ -184,16 +184,16 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
kmp_ListGlobalFctPtr glredFct) {

// Terminate all threads in non-SPMD mode except for the master thread.
if (checkGenericMode(loc) && GetThreadIdInBlock() != GetMasterThreadID())
if (!__kmpc_is_spmd_exec_mode() && GetThreadIdInBlock() != GetMasterThreadID())
return 0;

uint32_t ThreadId = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
uint32_t ThreadId = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());

// In non-generic mode all workers participate in the teams reduction.
// In generic mode only the team master participates in the teams
// reduction because the workers are waiting for parallel work.
uint32_t NumThreads =
checkSPMDMode(loc) ? GetNumberOfOmpThreads(/*isSPMDExecutionMode=*/true)
__kmpc_is_spmd_exec_mode() ? GetNumberOfOmpThreads(/*isSPMDExecutionMode=*/true)
: /*Master thread only*/ 1;
uint32_t TeamId = GetBlockIdInKernel();
uint32_t NumTeams = GetNumberOfBlocksInKernel();
Expand Down Expand Up @@ -225,7 +225,7 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
ChunkTeamCount = __kmpc_atomic_inc((uint32_t *)&Cnt, num_of_records - 1u);
}
// Synchronize
if (checkSPMDMode(loc))
if (__kmpc_is_spmd_exec_mode())
__kmpc_barrier(loc, global_tid);

// reduce_data is global or shared so before being reduced within the
Expand Down
51 changes: 0 additions & 51 deletions openmp/libomptarget/deviceRTLs/common/src/support.cu
Original file line number Diff line number Diff line change
Expand Up @@ -34,57 +34,6 @@ bool isRuntimeInitialized() {
return (execution_param & RuntimeMask) == RuntimeInitialized;
}

////////////////////////////////////////////////////////////////////////////////
// Execution Modes based on location parameter fields
////////////////////////////////////////////////////////////////////////////////

bool checkSPMDMode(kmp_Ident *loc) {
if (!loc)
return __kmpc_is_spmd_exec_mode();

// If SPMD is true then we are not in the UNDEFINED state so
// we can return immediately.
if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
return true;

// If not in SPMD mode and runtime required is a valid
// combination of flags so we can return immediately.
if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
return false;

// We are in underfined state.
return __kmpc_is_spmd_exec_mode();
}

bool checkGenericMode(kmp_Ident *loc) { return !checkSPMDMode(loc); }

bool checkRuntimeUninitialized(kmp_Ident *loc) {
if (!loc)
return isRuntimeUninitialized();

// If runtime is required then we know we can't be
// in the undefined mode. We can return immediately.
if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
return false;

// If runtime is required then we need to check is in
// SPMD mode or not. If not in SPMD mode then we end
// up in the UNDEFINED state that marks the orphaned
// functions.
if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
return true;

// Check if we are in an UNDEFINED state. Undefined is denoted by
// non-SPMD + noRuntimeRequired which is a combination that
// cannot actually happen. Undefined states is used to mark orphaned
// functions.
return isRuntimeUninitialized();
}

bool checkRuntimeInitialized(kmp_Ident *loc) {
return !checkRuntimeUninitialized(loc);
}

////////////////////////////////////////////////////////////////////////////////
// support: get info from machine
////////////////////////////////////////////////////////////////////////////////
Expand Down
10 changes: 5 additions & 5 deletions openmp/libomptarget/deviceRTLs/common/src/sync.cu
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,16 @@ EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc_ref, int32_t tid) {
}

EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
if (checkRuntimeUninitialized(loc_ref)) {
ASSERT0(LT_FUSSY, checkSPMDMode(loc_ref),
if (isRuntimeUninitialized()) {
ASSERT0(LT_FUSSY, __kmpc_is_spmd_exec_mode(),
"Expected SPMD mode with uninitialized runtime.");
__kmpc_barrier_simple_spmd(loc_ref, tid);
} else {
tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc_ref));
tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
int numberOfActiveOMPThreads =
GetNumberOfOmpThreads(checkSPMDMode(loc_ref));
GetNumberOfOmpThreads(__kmpc_is_spmd_exec_mode());
if (numberOfActiveOMPThreads > 1) {
if (checkSPMDMode(loc_ref)) {
if (__kmpc_is_spmd_exec_mode()) {
__kmpc_barrier_simple_spmd(loc_ref, tid);
} else {
// The #threads parameter must be rounded up to the WARPSIZE.
Expand Down
12 changes: 6 additions & 6 deletions openmp/libomptarget/deviceRTLs/common/src/task.cu
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
void *noAliasDepList) {
PRINT(LD_IO, "call to __kmpc_omp_task_with_deps(task 0x%llx)\n",
P64(newKmpTaskDescr));
ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
ASSERT0(LT_FUSSY, isRuntimeInitialized(),
"Runtime must be initialized.");
// 1. get explicit task descr from kmp task descr
omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
Expand All @@ -96,7 +96,7 @@ EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
"bad assumptions");

// 2. push new context: update new task descriptor
int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
newTaskDescr->CopyForExplicitTask(parentTaskDescr);
// set new task descriptor as top
Expand All @@ -122,7 +122,7 @@ EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr) {
PRINT(LD_IO, "call to __kmpc_omp_task_begin_if0(task 0x%llx)\n",
(unsigned long long)newKmpTaskDescr);
ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
ASSERT0(LT_FUSSY, isRuntimeInitialized(),
"Runtime must be initialized.");
// 1. get explicit task descr from kmp task descr
omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
Expand All @@ -135,7 +135,7 @@ EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
"bad assumptions");

// 2. push new context: update new task descriptor
int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
omptarget_nvptx_TaskDescr *parentTaskDescr = getMyTopTaskDescriptor(tid);
newTaskDescr->CopyForExplicitTask(parentTaskDescr);
// set new task descriptor as top
Expand All @@ -148,7 +148,7 @@ EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr) {
PRINT(LD_IO, "call to __kmpc_omp_task_complete_if0(task 0x%llx)\n",
(unsigned long long)newKmpTaskDescr);
ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
ASSERT0(LT_FUSSY, isRuntimeInitialized(),
"Runtime must be initialized.");
// 1. get explicit task descr from kmp task descr
omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
Expand All @@ -163,7 +163,7 @@ EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
omptarget_nvptx_TaskDescr *parentTaskDescr = newTaskDescr->GetPrevTaskDescr();
// 3... noting to call... is inline
// 4. pop context
int tid = GetLogicalThreadIdInBlock(checkSPMDMode(loc));
int tid = GetLogicalThreadIdInBlock(__kmpc_is_spmd_exec_mode());
omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
parentTaskDescr);
// 5. free
Expand Down
9 changes: 0 additions & 9 deletions openmp/libomptarget/deviceRTLs/common/support.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,6 @@ bool isGenericMode();
bool isRuntimeUninitialized();
bool isRuntimeInitialized();

////////////////////////////////////////////////////////////////////////////////
// Execution Modes based on location parameter fields
////////////////////////////////////////////////////////////////////////////////

bool checkSPMDMode(kmp_Ident *loc);
bool checkGenericMode(kmp_Ident *loc);
bool checkRuntimeUninitialized(kmp_Ident *loc);
bool checkRuntimeInitialized(kmp_Ident *loc);

////////////////////////////////////////////////////////////////////////////////
// get info from machine
////////////////////////////////////////////////////////////////////////////////
Expand Down

0 comments on commit e603ca0

Please sign in to comment.