Skip to content

Commit

Permalink
[OpenMP] Add support for ompt_callback_dispatch
Browse files Browse the repository at this point in the history
This change adds support for ompt_callback_dispatch with the new
dispatch chunk type introduced in 5.2. Definitions of the new
ompt_work_loop types were also added in the header file.

Differential Revision: https://reviews.llvm.org/D122107
  • Loading branch information
hansangbae committed Apr 6, 2022
1 parent 19e5235 commit e4ac11b
Show file tree
Hide file tree
Showing 12 changed files with 436 additions and 11 deletions.
20 changes: 16 additions & 4 deletions openmp/runtime/src/include/omp-tools.h.var
Expand Up @@ -266,7 +266,10 @@ typedef enum ompt_scope_endpoint_t {

typedef enum ompt_dispatch_t {
ompt_dispatch_iteration = 1,
ompt_dispatch_section = 2
ompt_dispatch_section = 2,
ompt_dispatch_ws_loop_chunk = 3,
ompt_dispatch_taskloop_chunk = 4,
ompt_dispatch_distribute_chunk = 5
} ompt_dispatch_t;

typedef enum ompt_sync_region_t {
Expand Down Expand Up @@ -303,7 +306,11 @@ typedef enum ompt_work_t {
ompt_work_workshare = 5,
ompt_work_distribute = 6,
ompt_work_taskloop = 7,
ompt_work_scope = 8
ompt_work_scope = 8,
ompt_work_loop_static = 10,
ompt_work_loop_dynamic = 11,
ompt_work_loop_guided = 12,
ompt_work_loop_other = 13
} ompt_work_t;

typedef enum ompt_mutex_t {
Expand Down Expand Up @@ -554,6 +561,11 @@ typedef struct ompt_dependence_t {
ompt_dependence_type_t dependence_type;
} ompt_dependence_t;

typedef struct ompt_dispatch_chunk_t {
uint64_t start;
uint64_t iterations;
} ompt_dispatch_chunk_t;

typedef int (*ompt_enumerate_states_t) (
int current_state,
int *next_state,
Expand Down Expand Up @@ -745,7 +757,7 @@ typedef struct ompt_record_parallel_end_t {
} ompt_record_parallel_end_t;

typedef void (*ompt_callback_work_t) (
ompt_work_t wstype,
ompt_work_t work_type,
ompt_scope_endpoint_t endpoint,
ompt_data_t *parallel_data,
ompt_data_t *task_data,
Expand All @@ -754,7 +766,7 @@ typedef void (*ompt_callback_work_t) (
);

typedef struct ompt_record_work_t {
ompt_work_t wstype;
ompt_work_t work_type;
ompt_scope_endpoint_t endpoint;
ompt_id_t parallel_id;
ompt_id_t task_id;
Expand Down
15 changes: 15 additions & 0 deletions openmp/runtime/src/kmp_dispatch.cpp
Expand Up @@ -1964,9 +1964,22 @@ int __kmp_dispatch_next_algorithm(int gtid,
&(task_info->task_data), 0, codeptr); \
} \
}
#define OMPT_LOOP_DISPATCH(lb, ub, st, status) \
if (ompt_enabled.ompt_callback_dispatch && status) { \
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \
ompt_task_info_t *task_info = __ompt_get_task_info_object(0); \
ompt_dispatch_chunk_t chunk; \
ompt_data_t instance = ompt_data_none; \
OMPT_GET_DISPATCH_CHUNK(chunk, lb, ub, st); \
instance.ptr = &chunk; \
ompt_callbacks.ompt_callback(ompt_callback_dispatch)( \
&(team_info->parallel_data), &(task_info->task_data), \
ompt_dispatch_ws_loop_chunk, instance); \
}
// TODO: implement count
#else
#define OMPT_LOOP_END // no-op
#define OMPT_LOOP_DISPATCH // no-op
#endif

#if KMP_STATS_ENABLED
Expand Down Expand Up @@ -2142,6 +2155,7 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
#if INCLUDE_SSC_MARKS
SSC_MARK_DISPATCH_NEXT();
#endif
OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status);
OMPT_LOOP_END;
KMP_STATS_LOOP_END;
return status;
Expand Down Expand Up @@ -2265,6 +2279,7 @@ static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last,
#if INCLUDE_SSC_MARKS
SSC_MARK_DISPATCH_NEXT();
#endif
OMPT_LOOP_DISPATCH(*p_lb, *p_ub, pr->u.p.st, status);
OMPT_LOOP_END;
KMP_STATS_LOOP_END;
return status;
Expand Down
65 changes: 59 additions & 6 deletions openmp/runtime/src/kmp_sched.cpp
Expand Up @@ -101,7 +101,7 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,

static kmp_int8 warn = 0;

if (ompt_enabled.ompt_callback_work) {
if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
// Only fully initialize variables needed by OMPT if OMPT is enabled.
team_info = __ompt_get_teaminfo(0, NULL);
task_info = __ompt_get_task_info_object(0);
Expand Down Expand Up @@ -438,6 +438,24 @@ static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
&(task_info->task_data), trip_count, codeptr);
}
if (ompt_enabled.ompt_callback_dispatch) {
ompt_dispatch_t dispatch_type;
ompt_data_t instance = ompt_data_none;
ompt_dispatch_chunk_t dispatch_chunk;
if (ompt_work_type == ompt_work_sections) {
dispatch_type = ompt_dispatch_section;
instance.ptr = codeptr;
} else {
OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr);
dispatch_type = (ompt_work_type == ompt_work_distribute)
? ompt_dispatch_distribute_chunk
: ompt_dispatch_ws_loop_chunk;
instance.ptr = &dispatch_chunk;
}
ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
&(team_info->parallel_data), &(task_info->task_data), dispatch_type,
instance);
}
#endif

KMP_STATS_LOOP_END(OMP_loop_static_iterations);
Expand All @@ -450,7 +468,12 @@ static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
T *plower, T *pupper, T *pupperDist,
typename traits_t<T>::signed_t *pstride,
typename traits_t<T>::signed_t incr,
typename traits_t<T>::signed_t chunk) {
typename traits_t<T>::signed_t chunk
#if OMPT_SUPPORT && OMPT_OPTIONAL
,
void *codeptr
#endif
) {
KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
Expand Down Expand Up @@ -682,6 +705,26 @@ end:;
}
#endif
KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
if (ompt_enabled.ompt_callback_work) {
ompt_callbacks.ompt_callback(ompt_callback_work)(
ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
&(task_info->task_data), 0, codeptr);
}
if (ompt_enabled.ompt_callback_dispatch) {
ompt_data_t instance = ompt_data_none;
ompt_dispatch_chunk_t dispatch_chunk;
OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr);
instance.ptr = &dispatch_chunk;
ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
&(team_info->parallel_data), &(task_info->task_data),
ompt_dispatch_distribute_chunk, instance);
}
}
#endif // OMPT_SUPPORT && OMPT_OPTIONAL
KMP_STATS_LOOP_END(OMP_distribute_iterations);
return;
}
Expand Down Expand Up @@ -887,6 +930,12 @@ void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
@}
*/

#if OMPT_SUPPORT && OMPT_OPTIONAL
#define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
#else
#define OMPT_CODEPTR_ARG
#endif

/*!
@ingroup WORK_SHARING
@param loc Source code location
Expand Down Expand Up @@ -915,7 +964,8 @@ void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
kmp_int32 *pupperD, kmp_int32 *pstride,
kmp_int32 incr, kmp_int32 chunk) {
__kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
pupper, pupperD, pstride, incr, chunk);
pupper, pupperD, pstride, incr,
chunk OMPT_CODEPTR_ARG);
}

/*!
Expand All @@ -927,7 +977,8 @@ void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
kmp_uint32 *pupperD, kmp_int32 *pstride,
kmp_int32 incr, kmp_int32 chunk) {
__kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
pupper, pupperD, pstride, incr, chunk);
pupper, pupperD, pstride, incr,
chunk OMPT_CODEPTR_ARG);
}

/*!
Expand All @@ -939,7 +990,8 @@ void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
kmp_int64 *pupperD, kmp_int64 *pstride,
kmp_int64 incr, kmp_int64 chunk) {
__kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
pupper, pupperD, pstride, incr, chunk);
pupper, pupperD, pstride, incr,
chunk OMPT_CODEPTR_ARG);
}

/*!
Expand All @@ -951,7 +1003,8 @@ void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
kmp_uint64 *pupperD, kmp_int64 *pstride,
kmp_int64 incr, kmp_int64 chunk) {
__kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
pupper, pupperD, pstride, incr, chunk);
pupper, pupperD, pstride, incr,
chunk OMPT_CODEPTR_ARG);
}
/*!
@}
Expand Down
18 changes: 18 additions & 0 deletions openmp/runtime/src/kmp_tasking.cpp
Expand Up @@ -1747,6 +1747,18 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
if (UNLIKELY(ompt_enabled.enabled))
__ompt_task_start(task, current_task, gtid);
#endif
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.ompt_callback_dispatch &&
taskdata->ompt_task_info.dispatch_chunk.iterations > 0)) {
ompt_data_t instance = ompt_data_none;
instance.ptr = &(taskdata->ompt_task_info.dispatch_chunk);
ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
&(team_info->parallel_data), &(taskdata->ompt_task_info.task_data),
ompt_dispatch_taskloop_chunk, instance);
taskdata->ompt_task_info.dispatch_chunk = {0, 0};
}
#endif // OMPT_SUPPORT && OMPT_OPTIONAL

#if OMPD_SUPPORT
if (ompd_state & OMPD_ENABLE_BP)
Expand Down Expand Up @@ -4643,6 +4655,12 @@ void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task,
#if OMPT_SUPPORT
__kmp_omp_taskloop_task(NULL, gtid, next_task,
codeptr_ra); // schedule new task
#if OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_dispatch) {
OMPT_GET_DISPATCH_CHUNK(next_taskdata->ompt_task_info.dispatch_chunk,
lower, upper, st);
}
#endif // OMPT_OPTIONAL
#else
__kmp_omp_task(gtid, next_task, true); // schedule new task
#endif
Expand Down
2 changes: 1 addition & 1 deletion openmp/runtime/src/ompt-event-specific.h
Expand Up @@ -104,7 +104,7 @@

#define ompt_callback_reduction_implemented ompt_event_MAY_ALWAYS_OPTIONAL

#define ompt_callback_dispatch_implemented ompt_event_UNIMPLEMENTED
#define ompt_callback_dispatch_implemented ompt_event_MAY_ALWAYS_OPTIONAL

#define ompt_callback_error_implemented ompt_event_MAY_ALWAYS_OPTIONAL

Expand Down
1 change: 1 addition & 0 deletions openmp/runtime/src/ompt-internal.h
Expand Up @@ -57,6 +57,7 @@ typedef struct {
ompt_data_t task_data;
struct kmp_taskdata *scheduling_parent;
int thread_num;
ompt_dispatch_chunk_t dispatch_chunk = {0, 0};
} ompt_task_info_t;

typedef struct {
Expand Down
11 changes: 11 additions & 0 deletions openmp/runtime/src/ompt-specific.h
Expand Up @@ -89,6 +89,17 @@ inline void *__ompt_load_return_address(int gtid) {
? __ompt_load_return_address(gtid) \
: __builtin_return_address(0))

#define OMPT_GET_DISPATCH_CHUNK(chunk, lb, ub, incr) \
do { \
if (incr > 0) { \
chunk.start = static_cast<uint64_t>(lb); \
chunk.iterations = static_cast<uint64_t>(((ub) - (lb)) / (incr) + 1); \
} else { \
chunk.start = static_cast<uint64_t>(ub); \
chunk.iterations = static_cast<uint64_t>(((lb) - (ub)) / -(incr) + 1); \
} \
} while (0)

//******************************************************************************
// inline functions
//******************************************************************************
Expand Down
48 changes: 48 additions & 0 deletions openmp/runtime/test/ompt/callback.h
Expand Up @@ -790,6 +790,12 @@ on_ompt_callback_work(
switch(wstype)
{
case ompt_work_loop:
case ompt_work_loop_static:
case ompt_work_loop_dynamic:
case ompt_work_loop_guided:
case ompt_work_loop_other:
// TODO: add schedule attribute for the different work_loop types.
// e.g., ", schedule=%s", ..., ompt_schedule_values[wstype]
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_loop_begin: parallel_id=%" PRIu64
", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
Expand Down Expand Up @@ -854,6 +860,10 @@ on_ompt_callback_work(
switch(wstype)
{
case ompt_work_loop:
case ompt_work_loop_static:
case ompt_work_loop_dynamic:
case ompt_work_loop_guided:
case ompt_work_loop_other:
printf("%" PRIu64 ":" _TOOL_PREFIX
" ompt_event_loop_end: parallel_id=%" PRIu64
", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
Expand Down Expand Up @@ -917,6 +927,43 @@ on_ompt_callback_work(
}
}

static void on_ompt_callback_dispatch(
ompt_data_t *parallel_data,
ompt_data_t *task_data,
ompt_dispatch_t kind,
ompt_data_t instance) {
char *event_name = NULL;
void *codeptr_ra = NULL;
ompt_dispatch_chunk_t *dispatch_chunk = NULL;
switch (kind) {
case ompt_dispatch_section:
event_name = "ompt_event_section_begin";
codeptr_ra = instance.ptr;
break;
case ompt_dispatch_ws_loop_chunk:
event_name = "ompt_event_ws_loop_chunk_begin";
dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
break;
case ompt_dispatch_taskloop_chunk:
event_name = "ompt_event_taskloop_chunk_begin";
dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
break;
case ompt_dispatch_distribute_chunk:
event_name = "ompt_event_distribute_chunk_begin";
dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
break;
default:
event_name = "ompt_ws_loop_iteration_begin";
}
printf("%" PRIu64 ":" _TOOL_PREFIX
" %s: parallel_id=%" PRIu64 ", task_id=%" PRIu64
", codeptr_ra=%p, chunk_start=%" PRIu64 ", chunk_iterations=%" PRIu64
"\n", ompt_get_thread_data()->value, event_name, parallel_data->value,
task_data->value, codeptr_ra,
dispatch_chunk ? dispatch_chunk->start : 0,
dispatch_chunk ? dispatch_chunk->iterations : 0);
}

static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,
ompt_data_t *parallel_data,
ompt_data_t *task_data,
Expand Down Expand Up @@ -1178,6 +1225,7 @@ int ompt_initialize(
register_ompt_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
register_ompt_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
register_ompt_callback(ompt_callback_work);
register_ompt_callback(ompt_callback_dispatch);
register_ompt_callback(ompt_callback_masked);
register_ompt_callback(ompt_callback_parallel_begin);
register_ompt_callback(ompt_callback_parallel_end);
Expand Down

0 comments on commit e4ac11b

Please sign in to comment.