Skip to content

Commit 7a9643f

Browse files
author
Jonathan Peyton
committed
[OpenMP][libomp] Add hidden helper affinity
Add new hidden helper affinity via the environment variable, KMP_HIDDEN_HELPER_AFFINITY, which allows users to assign thread affinity to hidden helper threads using the same syntax as KMP_AFFINITY. OMP_PLACES/OMP_PROC_BIND have no interaction with KMP_HIDDEN_HELPER_AFFINITY. Differential Revision: https://reviews.llvm.org/D135113
1 parent b03d67f commit 7a9643f

File tree

9 files changed

+265
-56
lines changed

9 files changed

+265
-56
lines changed

openmp/docs/design/Runtimes.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -374,6 +374,24 @@ The ``offset`` specifier indicates the starting position for thread assignment.
374374
across one socket, and ``granularity=socket`` the runtime will shift the
375375
granularity down to group since that is the largest granularity allowed by the OS.
376376

377+
KMP_HIDDEN_HELPER_AFFINITY (Windows, Linux)
378+
"""""""""""""""""""""""""""""
379+
380+
Enables run-time library to bind hidden helper threads to physical processing units.
381+
This environment variable has the same syntax and semantics as ``KMP_AFFINIY`` but only
382+
applies to the hidden helper team.
383+
384+
You must set this environment variable before the first parallel region, or
385+
certain API calls including ``omp_get_max_threads()``, ``omp_get_num_procs()``
386+
and any affinity API calls.
387+
388+
**Syntax:** Same as ``KMP_AFFINITY``
389+
390+
The following ``modifiers`` are ignored in ``KMP_HIDDEN_HELPER_AFFINITY`` and are only valid
391+
for ``KMP_AFFINITY``:
392+
* ``respect`` and ``norespect``
393+
* ``reset`` and ``noreset``
394+
377395
KMP_ALL_THREADS
378396
"""""""""""""""
379397

openmp/runtime/src/kmp.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -860,6 +860,8 @@ typedef struct kmp_affinity_t {
860860

861861
extern enum affinity_top_method __kmp_affinity_top_method;
862862
extern kmp_affinity_t __kmp_affinity;
863+
extern kmp_affinity_t __kmp_hh_affinity;
864+
extern kmp_affinity_t *__kmp_affinities[2];
863865

864866
extern void __kmp_affinity_bind_thread(int which);
865867

@@ -4257,6 +4259,9 @@ extern void __kmp_hidden_helper_main_thread_release();
42574259
#define KMP_HIDDEN_HELPER_WORKER_THREAD(gtid) \
42584260
((gtid) > 1 && (gtid) <= __kmp_hidden_helper_threads_num)
42594261

4262+
#define KMP_HIDDEN_HELPER_MAIN_THREAD(gtid) \
4263+
((gtid) == 1 && (gtid) <= __kmp_hidden_helper_threads_num)
4264+
42604265
#define KMP_HIDDEN_HELPER_TEAM(team) \
42614266
(team->t.t_threads[0] == __kmp_hidden_helper_main_thread)
42624267

openmp/runtime/src/kmp_affinity.cpp

Lines changed: 51 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -174,17 +174,18 @@ int kmp_hw_thread_t::compare_compact(const void *a, const void *b) {
174174
const kmp_hw_thread_t *aa = (const kmp_hw_thread_t *)a;
175175
const kmp_hw_thread_t *bb = (const kmp_hw_thread_t *)b;
176176
int depth = __kmp_topology->get_depth();
177-
KMP_DEBUG_ASSERT(__kmp_affinity.compact >= 0);
178-
KMP_DEBUG_ASSERT(__kmp_affinity.compact <= depth);
179-
for (i = 0; i < __kmp_affinity.compact; i++) {
177+
int compact = __kmp_topology->compact;
178+
KMP_DEBUG_ASSERT(compact >= 0);
179+
KMP_DEBUG_ASSERT(compact <= depth);
180+
for (i = 0; i < compact; i++) {
180181
int j = depth - i - 1;
181182
if (aa->sub_ids[j] < bb->sub_ids[j])
182183
return -1;
183184
if (aa->sub_ids[j] > bb->sub_ids[j])
184185
return 1;
185186
}
186187
for (; i < depth; i++) {
187-
int j = i - __kmp_affinity.compact;
188+
int j = i - compact;
188189
if (aa->sub_ids[j] < bb->sub_ids[j])
189190
return -1;
190191
if (aa->sub_ids[j] > bb->sub_ids[j])
@@ -583,6 +584,7 @@ kmp_topology_t *kmp_topology_t::allocate(int nproc, int ndepth,
583584
retval->count = arr + 2 * (size_t)KMP_HW_LAST;
584585
retval->num_core_efficiencies = 0;
585586
retval->num_core_types = 0;
587+
retval->compact = 0;
586588
for (int i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i)
587589
retval->core_types[i] = KMP_HW_CORE_TYPE_UNKNOWN;
588590
KMP_FOREACH_HW_TYPE(type) { retval->equivalent[type] = KMP_HW_UNKNOWN; }
@@ -4287,6 +4289,7 @@ static bool __kmp_aux_affinity_initialize_topology(kmp_affinity_t &affinity) {
42874289

42884290
static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
42894291
bool is_regular_affinity = (&affinity == &__kmp_affinity);
4292+
bool is_hidden_helper_affinity = (&affinity == &__kmp_hh_affinity);
42904293
const char *env_var = affinity.env_var;
42914294

42924295
if (affinity.flags.initialized) {
@@ -4335,7 +4338,8 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
43354338

43364339
case affinity_explicit:
43374340
KMP_DEBUG_ASSERT(affinity.proclist != NULL);
4338-
if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
4341+
if (is_hidden_helper_affinity ||
4342+
__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) {
43394343
__kmp_affinity_process_proclist(affinity);
43404344
} else {
43414345
__kmp_affinity_process_placelist(affinity);
@@ -4391,7 +4395,7 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
43914395
goto sortTopology;
43924396

43934397
case affinity_balanced:
4394-
if (depth <= 1) {
4398+
if (depth <= 1 || is_hidden_helper_affinity) {
43954399
KMP_AFF_WARNING(affinity, AffBalancedNotAvail, env_var);
43964400
affinity.type = affinity_none;
43974401
__kmp_create_affinity_none_places(affinity);
@@ -4451,15 +4455,16 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
44514455

44524456
if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
44534457
(__kmp_affinity_num_places > 0) &&
4454-
((unsigned)__kmp_affinity_num_places < affinity.num_masks)) {
4458+
((unsigned)__kmp_affinity_num_places < affinity.num_masks) &&
4459+
!is_hidden_helper_affinity) {
44554460
affinity.num_masks = __kmp_affinity_num_places;
44564461
}
44574462

44584463
KMP_CPU_ALLOC_ARRAY(affinity.masks, affinity.num_masks);
44594464

44604465
// Sort the topology table according to the current setting of
44614466
// affinity.compact, then fill out affinity.masks.
4462-
__kmp_topology->sort_compact();
4467+
__kmp_topology->sort_compact(affinity);
44634468
{
44644469
int i;
44654470
unsigned j;
@@ -4510,8 +4515,7 @@ void __kmp_affinity_initialize(kmp_affinity_t &affinity) {
45104515
}
45114516

45124517
void __kmp_affinity_uninitialize(void) {
4513-
{
4514-
kmp_affinity_t *affinity = &__kmp_affinity;
4518+
for (kmp_affinity_t *affinity : __kmp_affinities) {
45154519
if (affinity->masks != NULL)
45164520
KMP_CPU_FREE_ARRAY(affinity->masks, affinity->num_masks);
45174521
if (affinity->os_id_masks != NULL)
@@ -4546,6 +4550,21 @@ void __kmp_affinity_uninitialize(void) {
45464550
KMPAffinity::destroy_api();
45474551
}
45484552

4553+
static void __kmp_select_mask_by_gtid(int gtid, const kmp_affinity_t *affinity,
4554+
int *place, kmp_affin_mask_t **mask) {
4555+
int mask_idx;
4556+
bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
4557+
if (is_hidden_helper)
4558+
// The first gtid is the regular primary thread, the second gtid is the main
4559+
// thread of hidden team which does not participate in task execution.
4560+
mask_idx = gtid - 2;
4561+
else
4562+
mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
4563+
KMP_DEBUG_ASSERT(affinity->num_masks > 0);
4564+
*place = (mask_idx + affinity->offset) % affinity->num_masks;
4565+
*mask = KMP_CPU_INDEX(affinity->masks, *place);
4566+
}
4567+
45494568
void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
45504569
if (!KMP_AFFINITY_CAPABLE()) {
45514570
return;
@@ -4565,13 +4584,20 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
45654584
// same as the mask of the initialization thread.
45664585
kmp_affin_mask_t *mask;
45674586
int i;
4568-
const kmp_affinity_t *affinity = &__kmp_affinity;
4569-
const char *env_var = affinity->env_var;
4587+
const kmp_affinity_t *affinity;
4588+
const char *env_var;
4589+
bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);
45704590

4571-
if (KMP_AFFINITY_NON_PROC_BIND) {
4591+
if (is_hidden_helper)
4592+
affinity = &__kmp_hh_affinity;
4593+
else
4594+
affinity = &__kmp_affinity;
4595+
env_var = affinity->env_var;
4596+
4597+
if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) {
45724598
if ((affinity->type == affinity_none) ||
45734599
(affinity->type == affinity_balanced) ||
4574-
KMP_HIDDEN_HELPER_THREAD(gtid)) {
4600+
KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) {
45754601
#if KMP_GROUP_AFFINITY
45764602
if (__kmp_num_proc_groups > 1) {
45774603
return;
@@ -4581,14 +4607,10 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
45814607
i = 0;
45824608
mask = __kmp_affin_fullMask;
45834609
} else {
4584-
int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
4585-
KMP_DEBUG_ASSERT(affinity->num_masks > 0);
4586-
i = (mask_idx + affinity->offset) % affinity->num_masks;
4587-
mask = KMP_CPU_INDEX(affinity->masks, i);
4610+
__kmp_select_mask_by_gtid(gtid, affinity, &i, &mask);
45884611
}
45894612
} else {
4590-
if ((!isa_root) || KMP_HIDDEN_HELPER_THREAD(gtid) ||
4591-
(__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4613+
if (!isa_root || __kmp_nested_proc_bind.bind_types[0] == proc_bind_false) {
45924614
#if KMP_GROUP_AFFINITY
45934615
if (__kmp_num_proc_groups > 1) {
45944616
return;
@@ -4598,17 +4620,12 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
45984620
i = KMP_PLACE_ALL;
45994621
mask = __kmp_affin_fullMask;
46004622
} else {
4601-
// int i = some hash function or just a counter that doesn't
4602-
// always start at 0. Use adjusted gtid for now.
4603-
int mask_idx = __kmp_adjust_gtid_for_hidden_helpers(gtid);
4604-
KMP_DEBUG_ASSERT(affinity->num_masks > 0);
4605-
i = (mask_idx + affinity->offset) % affinity->num_masks;
4606-
mask = KMP_CPU_INDEX(affinity->masks, i);
4623+
__kmp_select_mask_by_gtid(gtid, affinity, &i, &mask);
46074624
}
46084625
}
46094626

46104627
th->th.th_current_place = i;
4611-
if (isa_root || KMP_HIDDEN_HELPER_THREAD(gtid)) {
4628+
if (isa_root && !is_hidden_helper) {
46124629
th->th.th_new_place = i;
46134630
th->th.th_first_place = 0;
46144631
th->th.th_last_place = affinity->num_masks - 1;
@@ -4629,28 +4646,18 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
46294646

46304647
KMP_CPU_COPY(th->th.th_affin_mask, mask);
46314648

4632-
if (affinity->flags.verbose && !KMP_HIDDEN_HELPER_THREAD(gtid)
4633-
/* to avoid duplicate printing (will be correctly printed on barrier) */
4634-
&& (affinity->type == affinity_none ||
4635-
(i != KMP_PLACE_ALL && affinity->type != affinity_balanced))) {
4649+
/* to avoid duplicate printing (will be correctly printed on barrier) */
4650+
if (affinity->flags.verbose &&
4651+
(affinity->type == affinity_none ||
4652+
(i != KMP_PLACE_ALL && affinity->type != affinity_balanced)) &&
4653+
!KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) {
46364654
char buf[KMP_AFFIN_MASK_PRINT_LEN];
46374655
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
46384656
th->th.th_affin_mask);
46394657
KMP_INFORM(BoundToOSProcSet, env_var, (kmp_int32)getpid(), __kmp_gettid(),
46404658
gtid, buf);
46414659
}
46424660

4643-
#if KMP_DEBUG
4644-
// Hidden helper thread affinity only printed for debug builds
4645-
if (affinity->flags.verbose && KMP_HIDDEN_HELPER_THREAD(gtid)) {
4646-
char buf[KMP_AFFIN_MASK_PRINT_LEN];
4647-
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4648-
th->th.th_affin_mask);
4649-
KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY (hidden helper thread)",
4650-
(kmp_int32)getpid(), __kmp_gettid(), gtid, buf);
4651-
}
4652-
#endif
4653-
46544661
#if KMP_OS_WINDOWS
46554662
// On Windows* OS, the process affinity mask might have changed. If the user
46564663
// didn't request affinity and this call fails, just continue silently.
@@ -4663,7 +4670,8 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
46634670
}
46644671

46654672
void __kmp_affinity_set_place(int gtid) {
4666-
if (!KMP_AFFINITY_CAPABLE()) {
4673+
// Hidden helper threads should not be affected by OMP_PLACES/OMP_PROC_BIND
4674+
if (!KMP_AFFINITY_CAPABLE() || KMP_HIDDEN_HELPER_THREAD(gtid)) {
46674675
return;
46684676
}
46694677

openmp/runtime/src/kmp_affinity.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -724,6 +724,9 @@ class kmp_topology_t {
724724
// Flags describing the topology
725725
flags_t flags;
726726

727+
// Compact value used during sort_compact()
728+
int compact;
729+
727730
// Insert a new topology layer after allocation
728731
void _insert_layer(kmp_hw_t type, const int *ids);
729732

@@ -866,7 +869,9 @@ class kmp_topology_t {
866869
}
867870

868871
#if KMP_AFFINITY_SUPPORTED
869-
void sort_compact() {
872+
friend int kmp_hw_thread_t::compare_compact(const void *a, const void *b);
873+
void sort_compact(kmp_affinity_t &affinity) {
874+
compact = affinity.compact;
870875
qsort(hw_threads, num_hw_threads, sizeof(kmp_hw_thread_t),
871876
kmp_hw_thread_t::compare_compact);
872877
}

openmp/runtime/src/kmp_global.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,6 +274,10 @@ enum affinity_top_method __kmp_affinity_top_method =
274274

275275
// Regular thread affinity settings from KMP_AFFINITY
276276
kmp_affinity_t __kmp_affinity = KMP_AFFINITY_INIT("KMP_AFFINITY");
277+
// Hidden helper thread affinity settings from KMP_HIDDEN_HELPER_AFFINITY
278+
kmp_affinity_t __kmp_hh_affinity =
279+
KMP_AFFINITY_INIT("KMP_HIDDEN_HELPER_AFFINITY");
280+
kmp_affinity_t *__kmp_affinities[] = {&__kmp_affinity, &__kmp_hh_affinity};
277281

278282
char *__kmp_cpuinfo_file = NULL;
279283

openmp/runtime/src/kmp_runtime.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7467,6 +7467,14 @@ void __kmp_hidden_helper_initialize() {
74677467
return;
74687468
}
74697469

7470+
#if KMP_AFFINITY_SUPPORTED
7471+
// Initialize hidden helper affinity settings.
7472+
// The above __kmp_parallel_initialize() will initialize
7473+
// regular affinity (and topology) if not already done.
7474+
if (!__kmp_hh_affinity.flags.initialized)
7475+
__kmp_affinity_initialize(__kmp_hh_affinity);
7476+
#endif
7477+
74707478
// Set the count of hidden helper tasks to be executed to zero
74717479
KMP_ATOMIC_ST_REL(&__kmp_unexecuted_hidden_helper_tasks, 0);
74727480

0 commit comments

Comments
 (0)