Skip to content

Commit

Permalink
[STATS] Use partitioned timer scheme
Browse files Browse the repository at this point in the history
This change removes the current timers with ones that partition time properly.
The current timers are nested, so that if a new timer, B, starts when the
current timer, A, is already timing, A's time will include B's. To eliminate
this problem, the partitioned timers are designed to stop the current timer (A),
let the new timer run (B), and when the new timer is finished, restart the
previously running timer (A). With this partitioning of time, a threads' timers
all sum up to the OMP_worker_thread_life time and can now easily show the
percentage of time a thread is spending in different parts of the runtime or
user code.

There is also a new state variable associated with each thread which tells where
it is executing a task. This corresponds with the timers: OMP_task_*, e.g., if
time is spent in OMP_task_taskwait, then that thread executed tasks inside a
#pragma omp taskwait construct.

The changes are mostly changing the MACROs to use the new PARITIONED_* macros,
the new partitionedTimers class and its methods, and new state logic.

Differential Revision: http://reviews.llvm.org/D19229

llvm-svn: 268640
  • Loading branch information
jpeyton52 committed May 5, 2016
1 parent 3e0b783 commit 11dc82f
Show file tree
Hide file tree
Showing 11 changed files with 369 additions and 68 deletions.
18 changes: 18 additions & 0 deletions openmp/runtime/src/kmp_barrier.cpp
Expand Up @@ -1048,6 +1048,8 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
void *reduce_data, void (*reduce)(void *, void *))
{
KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
register int tid = __kmp_tid_from_gtid(gtid);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team = this_thr->th.th_team;
Expand Down Expand Up @@ -1348,6 +1350,8 @@ __kmp_end_split_barrier(enum barrier_type bt, int gtid)
void
__kmp_join_barrier(int gtid)
{
KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
register kmp_info_t *this_thr = __kmp_threads[gtid];
register kmp_team_t *team;
Expand Down Expand Up @@ -1463,6 +1467,18 @@ __kmp_join_barrier(int gtid)
__kmp_task_team_wait(this_thr, team
USE_ITT_BUILD_ARG(itt_sync_obj) );
}
#if KMP_STATS_ENABLED
// Have master thread flag the workers to indicate they are now waiting for
// next parallel region, Also wake them up so they switch their timers to idle.
for (int i=0; i<team->t.t_nproc; ++i) {
kmp_info_t* team_thread = team->t.t_threads[i];
if (team_thread == this_thr)
continue;
team_thread->th.th_stats->setIdleFlag();
if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && team_thread->th.th_sleep_loc != NULL)
__kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread), team_thread->th.th_sleep_loc);
}
#endif
#if USE_ITT_BUILD
if (__itt_sync_create_ptr || KMP_ITT_DEBUG)
__kmp_itt_barrier_middle(gtid, itt_sync_obj);
Expand Down Expand Up @@ -1546,6 +1562,8 @@ __kmp_join_barrier(int gtid)
void
__kmp_fork_barrier(int gtid, int tid)
{
KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
kmp_info_t *this_thr = __kmp_threads[gtid];
kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
Expand Down
19 changes: 7 additions & 12 deletions openmp/runtime/src/kmp_csupport.c
Expand Up @@ -290,7 +290,6 @@ __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
}
else
{
KMP_STOP_EXPLICIT_TIMER(OMP_serial);
KMP_COUNT_BLOCK(OMP_PARALLEL);
}
#endif
Expand Down Expand Up @@ -345,10 +344,6 @@ __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...)
}
#endif
}
#if (KMP_STATS_ENABLED)
if (!inParallel)
KMP_START_EXPLICIT_TIMER(OMP_serial);
#endif
}

#if OMP_40_ENABLED
Expand Down Expand Up @@ -669,7 +664,6 @@ void
__kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
{
KMP_COUNT_BLOCK(OMP_BARRIER);
KMP_TIME_BLOCK(OMP_barrier);
KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) );

if (! TCR_4(__kmp_init_parallel))
Expand Down Expand Up @@ -713,7 +707,7 @@ __kmpc_master(ident_t *loc, kmp_int32 global_tid)

if( KMP_MASTER_GTID( global_tid )) {
KMP_COUNT_BLOCK(OMP_MASTER);
KMP_START_EXPLICIT_TIMER(OMP_master);
KMP_PUSH_PARTITIONED_TIMER(OMP_master);
status = 1;
}

Expand Down Expand Up @@ -763,7 +757,7 @@ __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) );

KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid ));
KMP_STOP_EXPLICIT_TIMER(OMP_master);
KMP_POP_PARTITIONED_TIMER();

#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
Expand Down Expand Up @@ -1088,7 +1082,7 @@ __kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit )
__kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
#else
KMP_COUNT_BLOCK(OMP_CRITICAL);
KMP_TIME_BLOCK(OMP_critical_wait); /* Time spent waiting to enter the critical section */
KMP_TIME_PARTITIONED_BLOCK(OMP_critical_wait); /* Time spent waiting to enter the critical section */
kmp_user_lock_p lck;

KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) );
Expand Down Expand Up @@ -1250,6 +1244,7 @@ __kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_nam
__kmp_itt_critical_acquired( lck );
#endif /* USE_ITT_BUILD */

KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid ));
} // __kmpc_critical_with_hint

Expand Down Expand Up @@ -1342,7 +1337,7 @@ __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
#endif

#endif // KMP_USE_DYNAMIC_LOCK
KMP_STOP_EXPLICIT_TIMER(OMP_critical);
KMP_POP_PARTITIONED_TIMER();
KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid ));
}

Expand Down Expand Up @@ -1464,7 +1459,7 @@ __kmpc_single(ident_t *loc, kmp_int32 global_tid)
if (rc) {
// We are going to execute the single statement, so we should count it.
KMP_COUNT_BLOCK(OMP_SINGLE);
KMP_START_EXPLICIT_TIMER(OMP_single);
KMP_PUSH_PARTITIONED_TIMER(OMP_single);
}

#if OMPT_SUPPORT && OMPT_TRACE
Expand Down Expand Up @@ -1507,7 +1502,7 @@ void
__kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
{
__kmp_exit_single( global_tid );
KMP_STOP_EXPLICIT_TIMER(OMP_single);
KMP_POP_PARTITIONED_TIMER();

#if OMPT_SUPPORT && OMPT_TRACE
kmp_info_t *this_thr = __kmp_threads[ global_tid ];
Expand Down
2 changes: 1 addition & 1 deletion openmp/runtime/src/kmp_dispatch.cpp
Expand Up @@ -1424,7 +1424,7 @@ __kmp_dispatch_next(
// This is potentially slightly misleading, schedule(runtime) will appear here even if the actual runtme schedule
// is static. (Which points out a disadavantage of schedule(runtime): even when static scheduling is used it costs
// more than a compile time choice to use static scheduling would.)
KMP_TIME_BLOCK(FOR_dynamic_scheduling);
KMP_TIME_PARTITIONED_BLOCK(FOR_dynamic_scheduling);

int status;
dispatch_private_info_template< T > * pr;
Expand Down
23 changes: 16 additions & 7 deletions openmp/runtime/src/kmp_runtime.c
Expand Up @@ -1543,7 +1543,8 @@ __kmp_fork_call(
#endif

{
KMP_TIME_BLOCK(OMP_work);
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
#if OMPT_SUPPORT
, exit_runtime_p
Expand Down Expand Up @@ -1618,7 +1619,8 @@ __kmp_fork_call(
gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) );

{
KMP_TIME_BLOCK(OMP_work);
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
if (! parent_team->t.t_invoke( gtid )) {
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
}
Expand Down Expand Up @@ -1738,7 +1740,8 @@ __kmp_fork_call(
#endif

{
KMP_TIME_BLOCK(OMP_work);
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
__kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv
#if OMPT_SUPPORT
, exit_runtime_p
Expand Down Expand Up @@ -1795,7 +1798,8 @@ __kmp_fork_call(
team->t.t_level--;
// AC: call special invoker for outer "parallel" of the teams construct
{
KMP_TIME_BLOCK(OMP_work);
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
invoker(gtid);
}
} else {
Expand Down Expand Up @@ -1842,7 +1846,8 @@ __kmp_fork_call(
#endif

{
KMP_TIME_BLOCK(OMP_work);
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
__kmp_invoke_microtask( microtask, gtid, 0, argc, args
#if OMPT_SUPPORT
, exit_runtime_p
Expand Down Expand Up @@ -2178,7 +2183,8 @@ __kmp_fork_call(
} // END of timer KMP_fork_call block

{
KMP_TIME_BLOCK(OMP_work);
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
// KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
if (! team->t.t_invoke( gtid )) {
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
Expand Down Expand Up @@ -5448,6 +5454,8 @@ __kmp_launch_thread( kmp_info_t *this_thr )
KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
{
KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
rc = (*pteam)->t.t_invoke( gtid );
}
KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
Expand Down Expand Up @@ -6783,7 +6791,8 @@ __kmp_invoke_task_func( int gtid )
#endif

{
KMP_TIME_BLOCK(OMP_work);
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn),
gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv
#if OMPT_SUPPORT
Expand Down
2 changes: 1 addition & 1 deletion openmp/runtime/src/kmp_sched.cpp
Expand Up @@ -84,7 +84,7 @@ __kmp_for_static_init(
typename traits_t< T >::signed_t chunk
) {
KMP_COUNT_BLOCK(OMP_FOR_static);
KMP_TIME_BLOCK (FOR_static_scheduling);
KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling);

typedef typename traits_t< T >::unsigned_t UT;
typedef typename traits_t< T >::signed_t ST;
Expand Down
79 changes: 73 additions & 6 deletions openmp/runtime/src/kmp_stats.cpp
Expand Up @@ -157,6 +157,7 @@ std::string statistic::format(char unit, bool total) const

void explicitTimer::start(timer_e timerEnumValue) {
startTime = tsc_tick_count::now();
totalPauseTime = 0;
if(timeStat::logEvent(timerEnumValue)) {
__kmp_stats_thread_ptr->incrementNestValue();
}
Expand All @@ -170,7 +171,7 @@ void explicitTimer::stop(timer_e timerEnumValue) {
tsc_tick_count finishTime = tsc_tick_count::now();

//stat->addSample ((tsc_tick_count::now() - startTime).ticks());
stat->addSample ((finishTime - startTime).ticks());
stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());

if(timeStat::logEvent(timerEnumValue)) {
__kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
Expand All @@ -182,6 +183,74 @@ void explicitTimer::stop(timer_e timerEnumValue) {
return;
}

/* ************************************************************** */
/* ************* partitionedTimers member functions ************* */
partitionedTimers::partitionedTimers() {
timer_stack.reserve(8);
}

// add a timer to this collection of partitioned timers.
void partitionedTimers::add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer) {
KMP_DEBUG_ASSERT((int)timer_index < (int)EXPLICIT_TIMER_LAST+1);
timers[timer_index] = timer_pointer;
}

// initialize the paritioned timers to an initial timer
void partitionedTimers::init(timerPair init_timer_pair) {
KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
timer_stack.push_back(init_timer_pair);
timers[init_timer_pair.get_index()]->start(init_timer_pair.get_timer());
}

// stop/save the current timer, and start the new timer (timer_pair)
// There is a special condition where if the current timer is equal to
// the one you are trying to push, then it only manipulates the stack,
// and it won't stop/start the currently running timer.
void partitionedTimers::push(timerPair timer_pair) {
// get the current timer
// stop current timer
// push new timer
// start the new timer
KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
timerPair current_timer = timer_stack.back();
timer_stack.push_back(timer_pair);
if(current_timer != timer_pair) {
timers[current_timer.get_index()]->pause();
timers[timer_pair.get_index()]->start(timer_pair.get_timer());
}
}

// stop/discard the current timer, and start the previously saved timer
void partitionedTimers::pop() {
// get the current timer
// stop current timer
// pop current timer
// get the new current timer and start it back up
KMP_DEBUG_ASSERT(this->timer_stack.size() > 1);
timerPair current_timer = timer_stack.back();
timer_stack.pop_back();
timerPair new_timer = timer_stack.back();
if(current_timer != new_timer) {
timers[current_timer.get_index()]->stop(current_timer.get_timer());
timers[new_timer.get_index()]->resume();
}
}

// Wind up all the currently running timers.
// This pops off all the timers from the stack and clears the stack
// After this is called, init() must be run again to initialize the
// stack of timers
void partitionedTimers::windup() {
while(timer_stack.size() > 1) {
this->pop();
}
if(timer_stack.size() > 0) {
timerPair last_timer = timer_stack.back();
timer_stack.pop_back();
timers[last_timer.get_index()]->stop(last_timer.get_timer());
}
}

/* ******************************************************************* */
/* ************* kmp_stats_event_vector member functions ************* */

Expand Down Expand Up @@ -397,8 +466,10 @@ void kmp_stats_output_module::windupExplicitTimers()
// If the timer wasn't running, this won't record anything anyway.
kmp_stats_list::iterator it;
for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
kmp_stats_list* ptr = *it;
ptr->getPartitionedTimers()->windup();
for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) {
(*it)->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer);
ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer);
}
}
}
Expand Down Expand Up @@ -595,11 +666,7 @@ void __kmp_reset_stats()

// reset the event vector so all previous events are "erased"
(*it)->resetEventVector();

// May need to restart the explicit timers in thread zero?
}
KMP_START_EXPLICIT_TIMER(OMP_serial);
KMP_START_EXPLICIT_TIMER(OMP_start_end);
}

// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already.
Expand Down

0 comments on commit 11dc82f

Please sign in to comment.