Skip to content

Commit

Permalink
[Libomptarget] Remove debug RAII from libomptarget
Browse files Browse the repository at this point in the history
This feature was supposed to allow you to trace execution inside of
Libomptarget. However, this never really worked properly. The printing
was always reoganized, only worked for single  threads, and pretty much
only told you a handful of things about a runtime library that's an
implementation detail to all users. Despite this, it contributed about
40% of the total filesize of the deviceRTL. This patch simply removes
this functionalit which I think was past due.

Reviewed By: tianshilei1992

Differential Revision: https://reviews.llvm.org/D157001
  • Loading branch information
jhuber6 committed Aug 3, 2023
1 parent 5468340 commit 46642cc
Show file tree
Hide file tree
Showing 13 changed files with 23 additions and 192 deletions.
30 changes: 0 additions & 30 deletions openmp/docs/design/Runtimes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1452,34 +1452,4 @@ to selectively enable and disable different features. Currently, the following
debugging features are supported.

* Enable debugging assertions in the device. ``0x01``
* Enable OpenMP runtime function traces in the device. ``0x2``
* Enable diagnosing common problems during offloading . ``0x4``

.. code-block:: c++

void copy(double *X, double *Y) {
#pragma omp target teams distribute parallel for
for (std::size_t i = 0; i < N; ++i)
Y[i] = X[i];
}
Compiling this code targeting ``nvptx64`` with debugging enabled will
provide the following output from the device runtime library.

.. code-block:: console
$ clang++ -fopenmp -fopenmp-targets=nvptx64 -fopenmp-target-debug=3
$ env LIBOMPTARGET_DEVICE_RTL_DEBUG=3 ./zaxpy
.. code-block:: text
Kernel.cpp:70: Thread 0 Entering int32_t __kmpc_target_init()
Parallelism.cpp:196: Thread 0 Entering int32_t __kmpc_global_thread_num()
Mapping.cpp:239: Thread 0 Entering uint32_t __kmpc_get_hardware_num_threads_in_block()
Workshare.cpp:616: Thread 0 Entering void __kmpc_distribute_static_init_4()
Parallelism.cpp:85: Thread 0 Entering void __kmpc_parallel_51()
Parallelism.cpp:69: Thread 0 Entering <OpenMP Outlined Function>
Workshare.cpp:575: Thread 0 Entering void __kmpc_for_static_init_4()
Workshare.cpp:660: Thread 0 Entering void __kmpc_distribute_static_fini()
Workshare.cpp:660: Thread 0 Entering void __kmpc_distribute_static_fini()
Kernel.cpp:103: Thread 0 Entering void __kmpc_target_deinit()
13 changes: 0 additions & 13 deletions openmp/libomptarget/DeviceRTL/include/Debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,17 +42,4 @@ void __assert_fail(const char *expr, const char *msg, const char *file,

///}

/// Enter a debugging scope for performing function traces. Enabled with
/// FunctionTracting set in the debug kind.
#define FunctionTracingRAII() \
DebugEntryRAII Entry(__FILE__, __LINE__, __PRETTY_FUNCTION__);

/// An RAII class for handling entries to debug locations. The current location
/// and function will be printed on entry. Nested levels increase the
/// indentation shown in the debugging output.
struct DebugEntryRAII {
DebugEntryRAII(const char *File, const unsigned Line, const char *Function);
~DebugEntryRAII();
};

#endif
28 changes: 0 additions & 28 deletions openmp/libomptarget/DeviceRTL/src/Debug.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,32 +37,4 @@ void __assert_fail(const char *expr, const char *msg, const char *file,
}
}

DebugEntryRAII::DebugEntryRAII(const char *File, const unsigned Line,
const char *Function) {
if (config::isDebugMode(config::DebugKind::FunctionTracing) &&
mapping::getThreadIdInBlock() == 0 &&
mapping::getBlockIdInKernel() == 0) {

uint16_t &Level =
state::getKernelEnvironment().DynamicEnv->DebugIndentionLevel;

for (int I = 0; I < Level; ++I)
PRINTF("%s", " ");

PRINTF("%s:%u: Thread %u Entering %s\n", File, Line,
mapping::getThreadIdInBlock(), Function);
Level++;
}
}

DebugEntryRAII::~DebugEntryRAII() {
if (config::isDebugMode(config::DebugKind::FunctionTracing) &&
mapping::getThreadIdInBlock() == 0 &&
mapping::getBlockIdInKernel() == 0) {
uint16_t &Level =
state::getKernelEnvironment().DynamicEnv->DebugIndentionLevel;
Level--;
}
}

#pragma omp end declare target
9 changes: 1 addition & 8 deletions openmp/libomptarget/DeviceRTL/src/Kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,6 @@ static void inititializeRuntime(bool IsSPMD,

/// Simple generic state machine for worker threads.
static void genericStateMachine(IdentTy *Ident) {
FunctionTracingRAII();

uint32_t TId = mapping::getThreadIdInBlock();

do {
Expand Down Expand Up @@ -70,7 +68,6 @@ extern "C" {
/// \param Ident Source location identification, can be NULL.
///
int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) {
FunctionTracingRAII();
ConfigurationEnvironmentTy &Configuration = KernelEnvironment.Configuration;
bool IsSPMD = Configuration.ExecMode &
llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD;
Expand Down Expand Up @@ -137,7 +134,6 @@ int32_t __kmpc_target_init(KernelEnvironmentTy &KernelEnvironment) {
/// \param Ident Source location identification, can be NULL.
///
void __kmpc_target_deinit() {
FunctionTracingRAII();
bool IsSPMD = mapping::isSPMDMode();
state::assumeInitialState(IsSPMD);
if (IsSPMD)
Expand All @@ -147,10 +143,7 @@ void __kmpc_target_deinit() {
state::ParallelRegionFn = nullptr;
}

int8_t __kmpc_is_spmd_exec_mode() {
FunctionTracingRAII();
return mapping::isSPMDMode();
}
int8_t __kmpc_is_spmd_exec_mode() { return mapping::isSPMDMode(); }
}

#pragma omp end declare target
3 changes: 0 additions & 3 deletions openmp/libomptarget/DeviceRTL/src/Mapping.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,17 +357,14 @@ bool mapping::isGenericMode() { return !isSPMDMode(); }

extern "C" {
__attribute__((noinline)) uint32_t __kmpc_get_hardware_thread_id_in_block() {
FunctionTracingRAII();
return mapping::getThreadIdInBlock();
}

__attribute__((noinline)) uint32_t __kmpc_get_hardware_num_threads_in_block() {
FunctionTracingRAII();
return impl::getNumberOfThreadsInBlock(mapping::DIM_X);
}

__attribute__((noinline)) uint32_t __kmpc_get_warp_size() {
FunctionTracingRAII();
return impl::getWarpSize();
}
}
Expand Down
10 changes: 2 additions & 8 deletions openmp/libomptarget/DeviceRTL/src/Misc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,9 @@ double getWTime() {
///{

extern "C" {
int32_t __kmpc_cancellationpoint(IdentTy *, int32_t, int32_t) {
FunctionTracingRAII();
return 0;
}
int32_t __kmpc_cancellationpoint(IdentTy *, int32_t, int32_t) { return 0; }

int32_t __kmpc_cancel(IdentTy *, int32_t, int32_t) {
FunctionTracingRAII();
return 0;
}
int32_t __kmpc_cancel(IdentTy *, int32_t, int32_t) { return 0; }

double omp_get_wtick(void) { return ompx::impl::getWTick(); }

Expand Down
23 changes: 4 additions & 19 deletions openmp/libomptarget/DeviceRTL/src/Parallelism.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ uint32_t determineNumberOfThreads(int32_t NumThreadsClause) {
// Invoke an outlined parallel function unwrapping arguments (up to 32).
void invokeMicrotask(int32_t global_tid, int32_t bound_tid, void *fn,
void **args, int64_t nargs) {
DebugEntryRAII Entry(__FILE__, __LINE__, "<OpenMP Outlined Function>");
switch (nargs) {
#include "generated_microtask_cases.gen"
default:
Expand All @@ -86,8 +85,6 @@ extern "C" {
void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
int32_t num_threads, int proc_bind, void *fn,
void *wrapper_fn, void **args, int64_t nargs) {
FunctionTracingRAII();

uint32_t TId = mapping::getThreadIdInBlock();

// Assert the parallelism level is zero if disabled by the user.
Expand Down Expand Up @@ -264,7 +261,6 @@ void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,

__attribute__((noinline)) bool
__kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
FunctionTracingRAII();
// Work function and arguments for L1 parallel region.
*WorkFn = state::ParallelRegionFn;

Expand All @@ -279,7 +275,6 @@ __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
}

__attribute__((noinline)) void __kmpc_kernel_end_parallel() {
FunctionTracingRAII();
// In case we have modified an ICV for this thread before a ThreadState was
// created. We drop it now to not contaminate the next parallel region.
ASSERT(!mapping::isSPMDMode(), nullptr);
Expand All @@ -288,24 +283,14 @@ __attribute__((noinline)) void __kmpc_kernel_end_parallel() {
ASSERT(!mapping::isSPMDMode(), nullptr);
}

uint16_t __kmpc_parallel_level(IdentTy *, uint32_t) {
FunctionTracingRAII();
return omp_get_level();
}
uint16_t __kmpc_parallel_level(IdentTy *, uint32_t) { return omp_get_level(); }

int32_t __kmpc_global_thread_num(IdentTy *) {
FunctionTracingRAII();
return omp_get_thread_num();
}
int32_t __kmpc_global_thread_num(IdentTy *) { return omp_get_thread_num(); }

void __kmpc_push_num_teams(IdentTy *loc, int32_t tid, int32_t num_teams,
int32_t thread_limit) {
FunctionTracingRAII();
}
int32_t thread_limit) {}

void __kmpc_push_proc_bind(IdentTy *loc, uint32_t tid, int proc_bind) {
FunctionTracingRAII();
}
void __kmpc_push_proc_bind(IdentTy *loc, uint32_t tid, int proc_bind) {}
}

#pragma omp end declare target
7 changes: 2 additions & 5 deletions openmp/libomptarget/DeviceRTL/src/Reduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,6 @@ extern "C" {
int32_t __kmpc_nvptx_parallel_reduce_nowait_v2(
IdentTy *Loc, int32_t TId, int32_t num_vars, uint64_t reduce_size,
void *reduce_data, ShuffleReductFnTy shflFct, InterWarpCopyFnTy cpyFct) {
FunctionTracingRAII();
return nvptx_parallel_reduce_nowait(TId, num_vars, reduce_size, reduce_data,
shflFct, cpyFct, mapping::isSPMDMode(),
false);
Expand All @@ -187,8 +186,6 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
void *reduce_data, ShuffleReductFnTy shflFct, InterWarpCopyFnTy cpyFct,
ListGlobalFnTy lgcpyFct, ListGlobalFnTy lgredFct, ListGlobalFnTy glcpyFct,
ListGlobalFnTy glredFct) {
FunctionTracingRAII();

// Terminate all threads in non-SPMD mode except for the master thread.
uint32_t ThreadId = mapping::getThreadIdInBlock();
if (mapping::isGenericMode()) {
Expand Down Expand Up @@ -311,9 +308,9 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_v2(
return 0;
}

void __kmpc_nvptx_end_reduce(int32_t TId) { FunctionTracingRAII(); }
void __kmpc_nvptx_end_reduce(int32_t TId) {}

void __kmpc_nvptx_end_reduce_nowait(int32_t TId) { FunctionTracingRAII(); }
void __kmpc_nvptx_end_reduce_nowait(int32_t TId) {}
}

#pragma omp end declare target
5 changes: 0 additions & 5 deletions openmp/libomptarget/DeviceRTL/src/State.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,12 +424,10 @@ int omp_get_initial_device(void) { return -1; }

extern "C" {
__attribute__((noinline)) void *__kmpc_alloc_shared(uint64_t Bytes) {
FunctionTracingRAII();
return memory::allocShared(Bytes, "Frontend alloc shared");
}

__attribute__((noinline)) void __kmpc_free_shared(void *Ptr, uint64_t Bytes) {
FunctionTracingRAII();
memory::freeShared(Ptr, Bytes, "Frontend free shared");
}

Expand All @@ -455,7 +453,6 @@ constexpr uint64_t NUM_SHARED_VARIABLES_IN_SHARED_MEM = 64;
allocator(omp_pteam_mem_alloc)

void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t nArgs) {
FunctionTracingRAII();
if (nArgs <= NUM_SHARED_VARIABLES_IN_SHARED_MEM) {
SharedMemVariableSharingSpacePtr = &SharedMemVariableSharingSpace[0];
} else {
Expand All @@ -468,13 +465,11 @@ void __kmpc_begin_sharing_variables(void ***GlobalArgs, uint64_t nArgs) {
}

void __kmpc_end_sharing_variables() {
FunctionTracingRAII();
if (SharedMemVariableSharingSpacePtr != &SharedMemVariableSharingSpace[0])
memory::freeGlobal(SharedMemVariableSharingSpacePtr, "new extended args");
}

void __kmpc_get_shared_variables(void ***GlobalArgs) {
FunctionTracingRAII();
*GlobalArgs = SharedMemVariableSharingSpacePtr;
}
}
Expand Down
33 changes: 7 additions & 26 deletions openmp/libomptarget/DeviceRTL/src/Synchronization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -504,18 +504,16 @@ void unsetCriticalLock(omp_lock_t *Lock) { impl::unsetLock(Lock); }
void setCriticalLock(omp_lock_t *Lock) { impl::setLock(Lock); }

extern "C" {
void __kmpc_ordered(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); }
void __kmpc_ordered(IdentTy *Loc, int32_t TId) {}

void __kmpc_end_ordered(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); }
void __kmpc_end_ordered(IdentTy *Loc, int32_t TId) {}

int32_t __kmpc_cancel_barrier(IdentTy *Loc, int32_t TId) {
FunctionTracingRAII();
__kmpc_barrier(Loc, TId);
return 0;
}

void __kmpc_barrier(IdentTy *Loc, int32_t TId) {
FunctionTracingRAII();
if (mapping::isMainThreadInGenericMode())
return __kmpc_flush(Loc);

Expand All @@ -527,62 +525,45 @@ void __kmpc_barrier(IdentTy *Loc, int32_t TId) {

__attribute__((noinline)) void __kmpc_barrier_simple_spmd(IdentTy *Loc,
int32_t TId) {
FunctionTracingRAII();
synchronize::threadsAligned(atomic::OrderingTy::seq_cst);
}

__attribute__((noinline)) void __kmpc_barrier_simple_generic(IdentTy *Loc,
int32_t TId) {
FunctionTracingRAII();
synchronize::threads(atomic::OrderingTy::seq_cst);
}

int32_t __kmpc_master(IdentTy *Loc, int32_t TId) {
FunctionTracingRAII();
return omp_get_thread_num() == 0;
}

void __kmpc_end_master(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); }
void __kmpc_end_master(IdentTy *Loc, int32_t TId) {}

int32_t __kmpc_masked(IdentTy *Loc, int32_t TId, int32_t Filter) {
FunctionTracingRAII();
return omp_get_thread_num() == Filter;
}

void __kmpc_end_masked(IdentTy *Loc, int32_t TId) { FunctionTracingRAII(); }
void __kmpc_end_masked(IdentTy *Loc, int32_t TId) {}

int32_t __kmpc_single(IdentTy *Loc, int32_t TId) {
FunctionTracingRAII();
return __kmpc_master(Loc, TId);
}

void __kmpc_end_single(IdentTy *Loc, int32_t TId) {
FunctionTracingRAII();
// The barrier is explicitly called.
}

void __kmpc_flush(IdentTy *Loc) {
FunctionTracingRAII();
fence::kernel(atomic::seq_cst);
}
void __kmpc_flush(IdentTy *Loc) { fence::kernel(atomic::seq_cst); }

uint64_t __kmpc_warp_active_thread_mask(void) {
FunctionTracingRAII();
return mapping::activemask();
}
uint64_t __kmpc_warp_active_thread_mask(void) { return mapping::activemask(); }

void __kmpc_syncwarp(uint64_t Mask) {
FunctionTracingRAII();
synchronize::warp(Mask);
}
void __kmpc_syncwarp(uint64_t Mask) { synchronize::warp(Mask); }

void __kmpc_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name) {
FunctionTracingRAII();
impl::setCriticalLock(reinterpret_cast<omp_lock_t *>(Name));
}

void __kmpc_end_critical(IdentTy *Loc, int32_t TId, CriticalNameTy *Name) {
FunctionTracingRAII();
impl::unsetCriticalLock(reinterpret_cast<omp_lock_t *>(Name));
}

Expand Down
Loading

0 comments on commit 46642cc

Please sign in to comment.