Skip to content

Commit

Permalink
[OpenMP][OMPT] Introduce a guard to handle OMPT return address
Browse files Browse the repository at this point in the history
This is an alternative approach to address inconsistencies pointed out in: D90078
This patch makes sure that the return address is reset, when leaving the scope.
In some cases, I had to move the macro out of an if-statement to have it in the
right scope, in some cases I added an additional block to restrict the scope.

This patch does not handle inconsistencies, which might occur if the return
address is still set when we call into the application.

Test case (repeated_calls.c) provided by @hbae

Differential Revision: https://reviews.llvm.org/D91692
  • Loading branch information
jprotze committed Nov 25, 2020
1 parent b281a05 commit 6d3b816
Show file tree
Hide file tree
Showing 4 changed files with 162 additions and 30 deletions.
27 changes: 13 additions & 14 deletions openmp/runtime/src/kmp_csupport.cpp
Expand Up @@ -297,8 +297,8 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
}
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif

#if INCLUDE_SSC_MARKS
Expand Down Expand Up @@ -713,8 +713,8 @@ void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
__kmp_threads[global_tid]->th.th_ident = loc;
// TODO: explicit barrier_wait_id:
Expand Down Expand Up @@ -851,8 +851,8 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
kmp_team_t *team;
ompt_wait_id_t lck;
void *codeptr_ra;
OMPT_STORE_RETURN_ADDRESS(gtid);
if (ompt_enabled.enabled) {
OMPT_STORE_RETURN_ADDRESS(gtid);
team = __kmp_team_from_gtid(gtid);
lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
/* OMPT state update */
Expand Down Expand Up @@ -1607,8 +1607,8 @@ kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
Expand Down Expand Up @@ -1671,8 +1671,8 @@ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
Expand Down Expand Up @@ -2069,8 +2069,8 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
/* This barrier is not a barrier region boundary */
#if USE_ITT_NOTIFY
Expand All @@ -2083,11 +2083,9 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,

// Consider next barrier a user-visible barrier for barrier region boundaries
// Nesting checks are already handled by the single construct checks

{
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
#if USE_ITT_NOTIFY
__kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
Expand All @@ -2099,6 +2097,7 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
ompt_frame->enter_frame = ompt_data_none;
}
#endif
}
}

/* -------------------------------------------------------------------------- */
Expand Down Expand Up @@ -3462,8 +3461,8 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
Expand Down Expand Up @@ -3651,8 +3650,8 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident =
Expand Down Expand Up @@ -3733,8 +3732,8 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
Expand All @@ -3759,8 +3758,8 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
#if USE_ITT_NOTIFY
__kmp_threads[global_tid]->th.th_ident = loc;
Expand All @@ -3780,8 +3779,8 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
if (ompt_frame->enter_frame.ptr == NULL)
ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
OMPT_STORE_RETURN_ADDRESS(global_tid);
#endif
// TODO: implicit barrier: should be exposed
#if USE_ITT_NOTIFY
Expand Down
38 changes: 24 additions & 14 deletions openmp/runtime/src/kmp_gsupport.cpp
Expand Up @@ -573,13 +573,17 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
gtid, lb, ub, str, chunk_sz)); \
\
if ((str > 0) ? (lb < ub) : (lb > ub)) { \
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
(schedule) != kmp_sch_static); \
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
(kmp_int *)p_ub, (kmp_int *)&stride); \
{ \
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
(schedule) != kmp_sch_static); \
} \
{ \
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
(kmp_int *)p_ub, (kmp_int *)&stride); \
} \
if (status) { \
KMP_DEBUG_ASSERT(stride == str); \
*p_ub += (str > 0) ? 1 : -1; \
Expand Down Expand Up @@ -609,12 +613,17 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
gtid, lb, ub, str, chunk_sz)); \
\
if ((str > 0) ? (lb < ub) : (lb > ub)) { \
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
(kmp_int *)p_ub, (kmp_int *)&stride); \
{ \
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \
(str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \
TRUE); \
} \
{ \
IF_OMPT_SUPPORT(OMPT_STORE_RETURN_ADDRESS(gtid);) \
status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \
(kmp_int *)p_ub, (kmp_int *)&stride); \
} \
if (status) { \
KMP_DEBUG_ASSERT(stride == str); \
*p_ub += (str > 0) ? 1 : -1; \
Expand Down Expand Up @@ -1482,12 +1491,13 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *),
task, data, num_threads, &loc, kmp_nm_dynamic_chunked,
(kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1);

{
#if OMPT_SUPPORT
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif

KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE);

}
task(data);
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid));
Expand Down
25 changes: 23 additions & 2 deletions openmp/runtime/src/ompt-specific.h
Expand Up @@ -75,11 +75,13 @@ inline void *__ompt_load_return_address(int gtid) {
return return_address;
}

#define OMPT_STORE_RETURN_ADDRESS(gtid) \
/*#define OMPT_STORE_RETURN_ADDRESS(gtid) \
if (ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \
!__kmp_threads[gtid]->th.ompt_thread_info.return_address) \
__kmp_threads[gtid]->th.ompt_thread_info.return_address = \
__builtin_return_address(0)
__builtin_return_address(0)*/
#define OMPT_STORE_RETURN_ADDRESS(gtid) \
OmptReturnAddressGuard ReturnAddressGuard{gtid, __builtin_return_address(0)};
#define OMPT_LOAD_RETURN_ADDRESS(gtid) __ompt_load_return_address(gtid)
#define OMPT_LOAD_OR_GET_RETURN_ADDRESS(gtid) \
((ompt_enabled.enabled && gtid >= 0 && __kmp_threads[gtid] && \
Expand Down Expand Up @@ -133,4 +135,23 @@ inline const char *ompt_get_runtime_version() {
#define OMPT_REDUCTION_END
#endif // ! OMPT_SUPPORT && OMPT_OPTIONAL

class OmptReturnAddressGuard {
private:
bool SetAddress{false};
int Gtid;

public:
OmptReturnAddressGuard(int Gtid, void *ReturnAddress) : Gtid(Gtid) {
if (ompt_enabled.enabled && Gtid >= 0 && __kmp_threads[Gtid] &&
!__kmp_threads[Gtid]->th.ompt_thread_info.return_address) {
SetAddress = true;
__kmp_threads[Gtid]->th.ompt_thread_info.return_address = ReturnAddress;
}
}
~OmptReturnAddressGuard() {
if (SetAddress)
__kmp_threads[Gtid]->th.ompt_thread_info.return_address = NULL;
}
};

#endif
102 changes: 102 additions & 0 deletions openmp/runtime/test/ompt/parallel/repeated_calls.c
@@ -0,0 +1,102 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt

#define USE_PRIVATE_TOOL 1
#include "callback.h"

__attribute__((noinline))
int foo(int x) {
#pragma omp parallel num_threads(2)
{
#pragma omp atomic
x++;
}
return x;
}

__attribute__((noinline))
int bar(int x) {
#pragma omp parallel num_threads(2)
{
#pragma omp critical
x++;
}
return x;
}

int main() {
int y;
y = foo(y);
y = bar(y);
y = foo(y);
return 0;

// CHECK-NOT: {{^}}0: Could not register callback
// CHECK: 0: NULL_POINTER=[[NULL:.*$]]

// First call to foo
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin
// CHECK-SAME: {{.*}}codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]

// Call to bar
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin

// Second call to foo
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
// CHECK-SAME: {{.*}}codeptr_ra=[[RETURN_ADDRESS]]

}

static void on_ompt_callback_thread_begin(
ompt_thread_t thread_type,
ompt_data_t *thread_data) {
if (thread_data->ptr)
printf("%s\n", "0: thread_data initially not null");
thread_data->value = ompt_get_unique_id();
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n",
ompt_get_thread_data()->value, ompt_thread_t_values[thread_type],
thread_type, thread_data->value);
}

static void on_ompt_callback_parallel_begin(
ompt_data_t *encountering_task_data,
const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
uint32_t requested_team_size, int flag, const void *codeptr_ra) {
if (parallel_data->ptr)
printf("0: parallel_data initially not null\n");
parallel_data->value = ompt_get_unique_id();
int invoker = flag & 0xF;
const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams";
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_%s_begin: parent_task_id=%" PRIu64
", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
"parallel_id=%" PRIu64 ", requested_%s=%" PRIu32
", codeptr_ra=%p, invoker=%d\n",
ompt_get_thread_data()->value, event, encountering_task_data->value,
encountering_task_frame->exit_frame.ptr,
encountering_task_frame->enter_frame.ptr, parallel_data->value, size,
requested_team_size, codeptr_ra, invoker);
}

int ompt_initialize(ompt_function_lookup_t lookup, int initial_device_num,
ompt_data_t *tool_data) {
ompt_set_callback = (ompt_set_callback_t)lookup("ompt_set_callback");
ompt_get_unique_id = (ompt_get_unique_id_t)lookup("ompt_get_unique_id");
ompt_get_thread_data = (ompt_get_thread_data_t)lookup("ompt_get_thread_data");

register_callback(ompt_callback_thread_begin);
register_callback(ompt_callback_parallel_begin);
printf("0: NULL_POINTER=%p\n", (void *)NULL);
return 1; // success
}

void ompt_finalize(ompt_data_t *tool_data) {}

ompt_start_tool_result_t *ompt_start_tool(unsigned int omp_version,
const char *runtime_version) {
static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,
&ompt_finalize, 0};
return &ompt_start_tool_result;
}

0 comments on commit 6d3b816

Please sign in to comment.