Skip to content

Commit

Permalink
Update stats-gathering code
Browse files Browse the repository at this point in the history
Have developer timers use partitioning scheme which also required that some
redundant developer timers be removed in favor of the already existing normal
timers. Move per thread stats initialization to just after global thread id
assignment which is as early as possible. Also put all global stats
initialization code in __kmp_stats_init() and all global stats destruction code
in __kmp_stats_fini().

Differential Revision: https://reviews.llvm.org/D26361

llvm-svn: 286892
  • Loading branch information
jpeyton52 committed Nov 14, 2016
1 parent f10a871 commit 5375fe8
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 117 deletions.
36 changes: 17 additions & 19 deletions openmp/runtime/src/kmp_barrier.cpp
Expand Up @@ -50,7 +50,7 @@ __kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
Expand Down Expand Up @@ -130,7 +130,7 @@ __kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gti
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_team_t *team;

Expand All @@ -149,7 +149,7 @@ __kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gti
if (nproc > 1) {
#if KMP_BARRIER_ICV_PUSH
{
KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
if (propagate_icvs) {
ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
for (i=1; i<nproc; ++i) {
Expand Down Expand Up @@ -225,7 +225,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
Expand Down Expand Up @@ -323,7 +323,7 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc;
Expand Down Expand Up @@ -393,7 +393,7 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid,

#if KMP_BARRIER_ICV_PUSH
{
KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
if (propagate_icvs) {
__kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
team, child_tid, FALSE);
Expand Down Expand Up @@ -426,7 +426,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
void (*reduce)(void *, void *)
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_info_t **other_threads = team->t.t_threads;
Expand Down Expand Up @@ -535,7 +535,7 @@ __kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid
int propagate_icvs
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
{
KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
register kmp_info_t **other_threads;
Expand Down Expand Up @@ -742,7 +742,7 @@ __kmp_hierarchical_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr,
int gtid, int tid, void (*reduce) (void *, void *)
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
register kmp_team_t *team = this_thr->th.th_team;
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc = this_thr->th.th_team_nproc;
Expand Down Expand Up @@ -883,7 +883,7 @@ __kmp_hierarchical_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, i
int propagate_icvs
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
{
KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
register kmp_team_t *team;
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
register kmp_uint32 nproc;
Expand Down Expand Up @@ -1067,9 +1067,8 @@ int
__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
void *reduce_data, void (*reduce)(void *, void *))
{
KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
register int tid = __kmp_tid_from_gtid(gtid);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team = this_thr->th.th_team;
Expand Down Expand Up @@ -1333,7 +1332,8 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
void
__kmp_end_split_barrier(enum barrier_type bt, int gtid)
{
KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
int tid = __kmp_tid_from_gtid(gtid);
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = this_thr->th.th_team;
Expand Down Expand Up @@ -1376,9 +1376,8 @@ __kmp_end_split_barrier(enum barrier_type bt, int gtid)
void
__kmp_join_barrier(int gtid)
{
KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team;
register kmp_uint nproc;
Expand Down Expand Up @@ -1592,9 +1591,8 @@ __kmp_join_barrier(int gtid)
void
__kmp_fork_barrier(int gtid, int tid)
{
KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
#if USE_ITT_BUILD
Expand Down Expand Up @@ -1707,7 +1705,7 @@ __kmp_fork_barrier(int gtid, int tid)
the fixed ICVs in the master's thread struct, because it is not always the case that the
threads arrays have been allocated when __kmp_fork_call() is executed. */
{
KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
// Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
Expand Down Expand Up @@ -1762,7 +1760,7 @@ __kmp_fork_barrier(int gtid, int tid)
void
__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc )
{
KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);

KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);
Expand Down
4 changes: 2 additions & 2 deletions openmp/runtime/src/kmp_global.c
Expand Up @@ -28,10 +28,10 @@ kmp_cpuinfo_t __kmp_cpuinfo = { 0 }; // Not initialized
kmp_tas_lock_t __kmp_stats_lock;

// global list of per thread stats, the head is a sentinel node which accumulates all stats produced before __kmp_create_worker is called.
kmp_stats_list __kmp_stats_list;
kmp_stats_list* __kmp_stats_list;

// thread local pointer to stats node within list
__thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list;
__thread kmp_stats_list* __kmp_stats_thread_ptr = NULL;

// gives reference tick for all events (considered the 0 tick)
tsc_tick_count __kmp_stats_start_time;
Expand Down
22 changes: 12 additions & 10 deletions openmp/runtime/src/kmp_runtime.c
Expand Up @@ -1417,7 +1417,7 @@ __kmp_fork_call(
kmp_hot_team_ptr_t **p_hot_teams;
#endif
{ // KMP_TIME_BLOCK
KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);

KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
Expand Down Expand Up @@ -2199,7 +2199,6 @@ __kmp_fork_call(
{
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
// KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
if (! team->t.t_invoke( gtid )) {
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
}
Expand Down Expand Up @@ -2258,7 +2257,7 @@ __kmp_join_call(ident_t *loc, int gtid
#endif /* OMP_40_ENABLED */
)
{
KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
kmp_team_t *team;
kmp_team_t *parent_team;
kmp_info_t *master_th;
Expand Down Expand Up @@ -3681,6 +3680,13 @@ __kmp_register_root( int initial_thread )
KMP_DEBUG_ASSERT( ! root->r.r_root_team );
}

#if KMP_STATS_ENABLED
// Initialize stats as soon as possible (right after gtid assignment).
__kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
KMP_SET_THREAD_STATE(SERIAL_REGION);
KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
#endif
__kmp_initialize_root( root );

/* setup new root thread structure */
Expand Down Expand Up @@ -4748,7 +4754,7 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
kmp_internal_control_t *new_icvs,
int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
{
KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
int f;
kmp_team_t *team;
int use_hot_team = ! root->r.r_active;
Expand Down Expand Up @@ -5504,14 +5510,11 @@ __kmp_launch_thread( kmp_info_t *this_thr )
}
#endif

KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
{
KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
rc = (*pteam)->t.t_invoke( gtid );
}
KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
KMP_ASSERT( rc );

#if OMPT_SUPPORT
Expand Down Expand Up @@ -6332,7 +6335,7 @@ __kmp_do_serial_initialize( void )
#endif
#endif
#if KMP_STATS_ENABLED
__kmp_init_tas_lock( & __kmp_stats_lock );
__kmp_stats_init();
#endif
__kmp_init_lock( & __kmp_global_lock );
__kmp_init_queuing_lock( & __kmp_dispatch_lock );
Expand Down Expand Up @@ -7293,8 +7296,7 @@ __kmp_cleanup( void )
__kmp_i18n_catclose();

#if KMP_STATS_ENABLED
__kmp_accumulate_stats_at_exit();
__kmp_stats_list.deallocate();
__kmp_stats_fini();
#endif

KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );
Expand Down
38 changes: 26 additions & 12 deletions openmp/runtime/src/kmp_stats.cpp
Expand Up @@ -29,11 +29,11 @@
#define expandName(name,flags,ignore) {STRINGIZE(name),flags},
statInfo timeStat::timerInfo[] = {
KMP_FOREACH_TIMER(expandName,0)
{0,0}
{"TIMER_LAST", 0}
};
const statInfo counter::counterInfo[] = {
KMP_FOREACH_COUNTER(expandName,0)
{0,0}
{"COUNTER_LAST", 0}
};
#undef expandName

Expand Down Expand Up @@ -71,7 +71,7 @@ const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArr
static uint32_t statsPrinted = 0;

// output interface
static kmp_stats_output_module __kmp_stats_global_output;
static kmp_stats_output_module* __kmp_stats_global_output = NULL;

/* ****************************************************** */
/* ************* statistic member functions ************* */
Expand Down Expand Up @@ -164,7 +164,7 @@ void explicitTimer::start(timer_e timerEnumValue) {
return;
}

void explicitTimer::stop(timer_e timerEnumValue) {
void explicitTimer::stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr /* = nullptr */) {
if (startTime.getValue() == 0)
return;

Expand All @@ -174,8 +174,10 @@ void explicitTimer::stop(timer_e timerEnumValue) {
stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());

if(timeStat::logEvent(timerEnumValue)) {
__kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
__kmp_stats_thread_ptr->decrementNestValue();
if(!stats_ptr)
stats_ptr = __kmp_stats_thread_ptr;
stats_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
stats_ptr->decrementNestValue();
}

/* We accept the risk that we drop a sample because it really did start at t==0. */
Expand Down Expand Up @@ -481,18 +483,18 @@ void kmp_stats_output_module::windupExplicitTimers()
// and say "it's over".
// If the timer wasn't running, this won't record anything anyway.
kmp_stats_list::iterator it;
for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
kmp_stats_list* ptr = *it;
ptr->getPartitionedTimers()->windup();
for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) {
ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer);
ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr);
}
}
}

void kmp_stats_output_module::printPloticusFile() {
int i;
int size = __kmp_stats_list.size();
int size = __kmp_stats_list->size();
FILE* plotOut = fopen(plotFileName, "w+");

fprintf(plotOut, "#proc page\n"
Expand Down Expand Up @@ -602,7 +604,7 @@ void kmp_stats_output_module::outputStats(const char* heading)
fprintf(statsOut, "%s\n",heading);
// Accumulate across threads.
kmp_stats_list::iterator it;
for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
int t = (*it)->getGtid();
// Output per thread stats if requested.
if (printPerThreadFlag) {
Expand Down Expand Up @@ -666,7 +668,7 @@ extern "C" {
void __kmp_reset_stats()
{
kmp_stats_list::iterator it;
for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
timeStat * timers = (*it)->getTimers();
counter * counters = (*it)->getCounters();
explicitTimer * eTimers = (*it)->getExplicitTimers();
Expand All @@ -688,7 +690,7 @@ void __kmp_reset_stats()
// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already.
void __kmp_output_stats(const char * heading)
{
__kmp_stats_global_output.outputStats(heading);
__kmp_stats_global_output->outputStats(heading);
__kmp_reset_stats();
}

Expand All @@ -703,6 +705,18 @@ void __kmp_accumulate_stats_at_exit(void)

void __kmp_stats_init(void)
{
__kmp_init_tas_lock( & __kmp_stats_lock );
__kmp_stats_start_time = tsc_tick_count::now();
__kmp_stats_global_output = new kmp_stats_output_module();
__kmp_stats_list = new kmp_stats_list();
}

void __kmp_stats_fini(void)
{
__kmp_accumulate_stats_at_exit();
__kmp_stats_list->deallocate();
delete __kmp_stats_global_output;
delete __kmp_stats_list;
}

} // extern "C"
Expand Down

0 comments on commit 5375fe8

Please sign in to comment.