Skip to content

Commit 5375fe8

Browse files
committed
Update stats-gathering code
Have developer timers use partitioning scheme which also required that some redundant developer timers be removed in favor of the already existing normal timers. Move per thread stats initialization to just after global thread id assignment which is as early as possible. Also put all global stats initialization code in __kmp_stats_init() and all global stats destruction code in __kmp_stats_fini(). Differential Revision: https://reviews.llvm.org/D26361 llvm-svn: 286892
1 parent f10a871 commit 5375fe8

File tree

6 files changed

+102
-117
lines changed

6 files changed

+102
-117
lines changed

openmp/runtime/src/kmp_barrier.cpp

Lines changed: 17 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ __kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid
5050
void (*reduce)(void *, void *)
5151
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
5252
{
53-
KMP_TIME_DEVELOPER_BLOCK(KMP_linear_gather);
53+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather);
5454
register kmp_team_t *team = this_thr->th.th_team;
5555
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
5656
register kmp_info_t **other_threads = team->t.t_threads;
@@ -130,7 +130,7 @@ __kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gti
130130
int propagate_icvs
131131
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
132132
{
133-
KMP_TIME_DEVELOPER_BLOCK(KMP_linear_release);
133+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release);
134134
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
135135
register kmp_team_t *team;
136136

@@ -149,7 +149,7 @@ __kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gti
149149
if (nproc > 1) {
150150
#if KMP_BARRIER_ICV_PUSH
151151
{
152-
KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
152+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
153153
if (propagate_icvs) {
154154
ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs);
155155
for (i=1; i<nproc; ++i) {
@@ -225,7 +225,7 @@ __kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
225225
void (*reduce)(void *, void *)
226226
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
227227
{
228-
KMP_TIME_DEVELOPER_BLOCK(KMP_tree_gather);
228+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather);
229229
register kmp_team_t *team = this_thr->th.th_team;
230230
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
231231
register kmp_info_t **other_threads = team->t.t_threads;
@@ -323,7 +323,7 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
323323
int propagate_icvs
324324
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
325325
{
326-
KMP_TIME_DEVELOPER_BLOCK(KMP_tree_release);
326+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release);
327327
register kmp_team_t *team;
328328
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
329329
register kmp_uint32 nproc;
@@ -393,7 +393,7 @@ __kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
393393

394394
#if KMP_BARRIER_ICV_PUSH
395395
{
396-
KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
396+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
397397
if (propagate_icvs) {
398398
__kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid],
399399
team, child_tid, FALSE);
@@ -426,7 +426,7 @@ __kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid,
426426
void (*reduce)(void *, void *)
427427
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
428428
{
429-
KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_gather);
429+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather);
430430
register kmp_team_t *team = this_thr->th.th_team;
431431
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
432432
register kmp_info_t **other_threads = team->t.t_threads;
@@ -535,7 +535,7 @@ __kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid
535535
int propagate_icvs
536536
USE_ITT_BUILD_ARG(void *itt_sync_obj) )
537537
{
538-
KMP_TIME_DEVELOPER_BLOCK(KMP_hyper_release);
538+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release);
539539
register kmp_team_t *team;
540540
register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb;
541541
register kmp_info_t **other_threads;
@@ -742,7 +742,7 @@ __kmp_hierarchical_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr,
742742
int gtid, int tid, void (*reduce) (void *, void *)
743743
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
744744
{
745-
KMP_TIME_DEVELOPER_BLOCK(KMP_hier_gather);
745+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather);
746746
register kmp_team_t *team = this_thr->th.th_team;
747747
register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb;
748748
register kmp_uint32 nproc = this_thr->th.th_team_nproc;
@@ -883,7 +883,7 @@ __kmp_hierarchical_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, i
883883
int propagate_icvs
884884
USE_ITT_BUILD_ARG(void * itt_sync_obj) )
885885
{
886-
KMP_TIME_DEVELOPER_BLOCK(KMP_hier_release);
886+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release);
887887
register kmp_team_t *team;
888888
register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb;
889889
register kmp_uint32 nproc;
@@ -1067,9 +1067,8 @@ int
10671067
__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
10681068
void *reduce_data, void (*reduce)(void *, void *))
10691069
{
1070-
KMP_TIME_DEVELOPER_BLOCK(KMP_barrier);
1071-
KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
10721070
KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier);
1071+
KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
10731072
register int tid = __kmp_tid_from_gtid(gtid);
10741073
register kmp_info_t *this_thr = __kmp_threads[gtid];
10751074
register kmp_team_t *team = this_thr->th.th_team;
@@ -1333,7 +1332,8 @@ __kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size,
13331332
void
13341333
__kmp_end_split_barrier(enum barrier_type bt, int gtid)
13351334
{
1336-
KMP_TIME_DEVELOPER_BLOCK(KMP_end_split_barrier);
1335+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier);
1336+
KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER);
13371337
int tid = __kmp_tid_from_gtid(gtid);
13381338
kmp_info_t *this_thr = __kmp_threads[gtid];
13391339
kmp_team_t *team = this_thr->th.th_team;
@@ -1376,9 +1376,8 @@ __kmp_end_split_barrier(enum barrier_type bt, int gtid)
13761376
void
13771377
__kmp_join_barrier(int gtid)
13781378
{
1379-
KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
1379+
KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier);
13801380
KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1381-
KMP_TIME_DEVELOPER_BLOCK(KMP_join_barrier);
13821381
register kmp_info_t *this_thr = __kmp_threads[gtid];
13831382
register kmp_team_t *team;
13841383
register kmp_uint nproc;
@@ -1592,9 +1591,8 @@ __kmp_join_barrier(int gtid)
15921591
void
15931592
__kmp_fork_barrier(int gtid, int tid)
15941593
{
1595-
KMP_TIME_PARTITIONED_BLOCK(OMP_fork_join_barrier);
1594+
KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier);
15961595
KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER);
1597-
KMP_TIME_DEVELOPER_BLOCK(KMP_fork_barrier);
15981596
kmp_info_t *this_thr = __kmp_threads[gtid];
15991597
kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL;
16001598
#if USE_ITT_BUILD
@@ -1707,7 +1705,7 @@ __kmp_fork_barrier(int gtid, int tid)
17071705
the fixed ICVs in the master's thread struct, because it is not always the case that the
17081706
threads arrays have been allocated when __kmp_fork_call() is executed. */
17091707
{
1710-
KMP_TIME_DEVELOPER_BLOCK(USER_icv_copy);
1708+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy);
17111709
if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs
17121710
// Copy the initial ICVs from the master's thread struct to the implicit task for this tid.
17131711
KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid));
@@ -1762,7 +1760,7 @@ __kmp_fork_barrier(int gtid, int tid)
17621760
void
17631761
__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc )
17641762
{
1765-
KMP_TIME_DEVELOPER_BLOCK(KMP_setup_icv_copy);
1763+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy);
17661764

17671765
KMP_DEBUG_ASSERT(team && new_nproc && new_icvs);
17681766
KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc);

openmp/runtime/src/kmp_global.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@ kmp_cpuinfo_t __kmp_cpuinfo = { 0 }; // Not initialized
2828
kmp_tas_lock_t __kmp_stats_lock;
2929

3030
// global list of per thread stats, the head is a sentinel node which accumulates all stats produced before __kmp_create_worker is called.
31-
kmp_stats_list __kmp_stats_list;
31+
kmp_stats_list* __kmp_stats_list;
3232

3333
// thread local pointer to stats node within list
34-
__thread kmp_stats_list* __kmp_stats_thread_ptr = &__kmp_stats_list;
34+
__thread kmp_stats_list* __kmp_stats_thread_ptr = NULL;
3535

3636
// gives reference tick for all events (considered the 0 tick)
3737
tsc_tick_count __kmp_stats_start_time;

openmp/runtime/src/kmp_runtime.c

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1417,7 +1417,7 @@ __kmp_fork_call(
14171417
kmp_hot_team_ptr_t **p_hot_teams;
14181418
#endif
14191419
{ // KMP_TIME_BLOCK
1420-
KMP_TIME_DEVELOPER_BLOCK(KMP_fork_call);
1420+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
14211421
KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
14221422

14231423
KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid ));
@@ -2199,7 +2199,6 @@ __kmp_fork_call(
21992199
{
22002200
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
22012201
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
2202-
// KMP_TIME_DEVELOPER_BLOCK(USER_master_invoke);
22032202
if (! team->t.t_invoke( gtid )) {
22042203
KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" );
22052204
}
@@ -2258,7 +2257,7 @@ __kmp_join_call(ident_t *loc, int gtid
22582257
#endif /* OMP_40_ENABLED */
22592258
)
22602259
{
2261-
KMP_TIME_DEVELOPER_BLOCK(KMP_join_call);
2260+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call);
22622261
kmp_team_t *team;
22632262
kmp_team_t *parent_team;
22642263
kmp_info_t *master_th;
@@ -3681,6 +3680,13 @@ __kmp_register_root( int initial_thread )
36813680
KMP_DEBUG_ASSERT( ! root->r.r_root_team );
36823681
}
36833682

3683+
#if KMP_STATS_ENABLED
3684+
// Initialize stats as soon as possible (right after gtid assignment).
3685+
__kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid);
3686+
KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life);
3687+
KMP_SET_THREAD_STATE(SERIAL_REGION);
3688+
KMP_INIT_PARTITIONED_TIMERS(OMP_serial);
3689+
#endif
36843690
__kmp_initialize_root( root );
36853691

36863692
/* setup new root thread structure */
@@ -4748,7 +4754,7 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc,
47484754
kmp_internal_control_t *new_icvs,
47494755
int argc USE_NESTED_HOT_ARG(kmp_info_t *master) )
47504756
{
4751-
KMP_TIME_DEVELOPER_BLOCK(KMP_allocate_team);
4757+
KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team);
47524758
int f;
47534759
kmp_team_t *team;
47544760
int use_hot_team = ! root->r.r_active;
@@ -5504,14 +5510,11 @@ __kmp_launch_thread( kmp_info_t *this_thr )
55045510
}
55055511
#endif
55065512

5507-
KMP_STOP_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
55085513
{
5509-
KMP_TIME_DEVELOPER_BLOCK(USER_worker_invoke);
55105514
KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
55115515
KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
55125516
rc = (*pteam)->t.t_invoke( gtid );
55135517
}
5514-
KMP_START_DEVELOPER_EXPLICIT_TIMER(USER_launch_thread_loop);
55155518
KMP_ASSERT( rc );
55165519

55175520
#if OMPT_SUPPORT
@@ -6332,7 +6335,7 @@ __kmp_do_serial_initialize( void )
63326335
#endif
63336336
#endif
63346337
#if KMP_STATS_ENABLED
6335-
__kmp_init_tas_lock( & __kmp_stats_lock );
6338+
__kmp_stats_init();
63366339
#endif
63376340
__kmp_init_lock( & __kmp_global_lock );
63386341
__kmp_init_queuing_lock( & __kmp_dispatch_lock );
@@ -7293,8 +7296,7 @@ __kmp_cleanup( void )
72937296
__kmp_i18n_catclose();
72947297

72957298
#if KMP_STATS_ENABLED
7296-
__kmp_accumulate_stats_at_exit();
7297-
__kmp_stats_list.deallocate();
7299+
__kmp_stats_fini();
72987300
#endif
72997301

73007302
KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) );

openmp/runtime/src/kmp_stats.cpp

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@
2929
#define expandName(name,flags,ignore) {STRINGIZE(name),flags},
3030
statInfo timeStat::timerInfo[] = {
3131
KMP_FOREACH_TIMER(expandName,0)
32-
{0,0}
32+
{"TIMER_LAST", 0}
3333
};
3434
const statInfo counter::counterInfo[] = {
3535
KMP_FOREACH_COUNTER(expandName,0)
36-
{0,0}
36+
{"COUNTER_LAST", 0}
3737
};
3838
#undef expandName
3939

@@ -71,7 +71,7 @@ const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArr
7171
static uint32_t statsPrinted = 0;
7272

7373
// output interface
74-
static kmp_stats_output_module __kmp_stats_global_output;
74+
static kmp_stats_output_module* __kmp_stats_global_output = NULL;
7575

7676
/* ****************************************************** */
7777
/* ************* statistic member functions ************* */
@@ -164,7 +164,7 @@ void explicitTimer::start(timer_e timerEnumValue) {
164164
return;
165165
}
166166

167-
void explicitTimer::stop(timer_e timerEnumValue) {
167+
void explicitTimer::stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr /* = nullptr */) {
168168
if (startTime.getValue() == 0)
169169
return;
170170

@@ -174,8 +174,10 @@ void explicitTimer::stop(timer_e timerEnumValue) {
174174
stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());
175175

176176
if(timeStat::logEvent(timerEnumValue)) {
177-
__kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
178-
__kmp_stats_thread_ptr->decrementNestValue();
177+
if(!stats_ptr)
178+
stats_ptr = __kmp_stats_thread_ptr;
179+
stats_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
180+
stats_ptr->decrementNestValue();
179181
}
180182

181183
/* We accept the risk that we drop a sample because it really did start at t==0. */
@@ -481,18 +483,18 @@ void kmp_stats_output_module::windupExplicitTimers()
481483
// and say "it's over".
482484
// If the timer wasn't running, this won't record anything anyway.
483485
kmp_stats_list::iterator it;
484-
for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
486+
for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
485487
kmp_stats_list* ptr = *it;
486488
ptr->getPartitionedTimers()->windup();
487489
for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) {
488-
ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer);
490+
ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr);
489491
}
490492
}
491493
}
492494

493495
void kmp_stats_output_module::printPloticusFile() {
494496
int i;
495-
int size = __kmp_stats_list.size();
497+
int size = __kmp_stats_list->size();
496498
FILE* plotOut = fopen(plotFileName, "w+");
497499

498500
fprintf(plotOut, "#proc page\n"
@@ -602,7 +604,7 @@ void kmp_stats_output_module::outputStats(const char* heading)
602604
fprintf(statsOut, "%s\n",heading);
603605
// Accumulate across threads.
604606
kmp_stats_list::iterator it;
605-
for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
607+
for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
606608
int t = (*it)->getGtid();
607609
// Output per thread stats if requested.
608610
if (printPerThreadFlag) {
@@ -666,7 +668,7 @@ extern "C" {
666668
void __kmp_reset_stats()
667669
{
668670
kmp_stats_list::iterator it;
669-
for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) {
671+
for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
670672
timeStat * timers = (*it)->getTimers();
671673
counter * counters = (*it)->getCounters();
672674
explicitTimer * eTimers = (*it)->getExplicitTimers();
@@ -688,7 +690,7 @@ void __kmp_reset_stats()
688690
// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already.
689691
void __kmp_output_stats(const char * heading)
690692
{
691-
__kmp_stats_global_output.outputStats(heading);
693+
__kmp_stats_global_output->outputStats(heading);
692694
__kmp_reset_stats();
693695
}
694696

@@ -703,6 +705,18 @@ void __kmp_accumulate_stats_at_exit(void)
703705

704706
void __kmp_stats_init(void)
705707
{
708+
__kmp_init_tas_lock( & __kmp_stats_lock );
709+
__kmp_stats_start_time = tsc_tick_count::now();
710+
__kmp_stats_global_output = new kmp_stats_output_module();
711+
__kmp_stats_list = new kmp_stats_list();
712+
}
713+
714+
void __kmp_stats_fini(void)
715+
{
716+
__kmp_accumulate_stats_at_exit();
717+
__kmp_stats_list->deallocate();
718+
delete __kmp_stats_global_output;
719+
delete __kmp_stats_list;
706720
}
707721

708722
} // extern "C"

0 commit comments

Comments
 (0)