Skip to content

Commit

Permalink
[OpenMP] Let primary thread gather topology info for each worker thread
Browse files Browse the repository at this point in the history
This change has the primary thread create each thread's initial mask
and topology information so it is available immediately after
forking. The setting of mask/topology information is decoupled from the
actual binding. Also add this setting of topology information inside the
__kmp_partition_places mechanism for OMP_PLACES+OMP_PROC_BIND.

Without this, there could be a timing window after the primary
thread signals the workers to fork where worker threads have not yet
established their affinity mask or topology information.

Each worker thread will then bind to the location the primary thread
sets.

Differential Revision: https://reviews.llvm.org/D156727
  • Loading branch information
jpeyton52 committed Aug 22, 2023
1 parent 3a4f471 commit 99f5969
Show file tree
Hide file tree
Showing 6 changed files with 56 additions and 63 deletions.
6 changes: 4 additions & 2 deletions openmp/runtime/src/kmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -3795,7 +3795,8 @@ extern void __kmp_affinity_initialize(kmp_affinity_t &affinity);
extern void __kmp_affinity_uninitialize(void);
extern void __kmp_affinity_set_init_mask(
int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */
extern void __kmp_affinity_set_place(int gtid);
void __kmp_affinity_bind_init_mask(int gtid);
extern void __kmp_affinity_bind_place(int gtid);
extern void __kmp_affinity_determine_capable(const char *env_var);
extern int __kmp_aux_set_affinity(void **mask);
extern int __kmp_aux_get_affinity(void **mask);
Expand All @@ -3811,7 +3812,8 @@ static inline void __kmp_assign_root_init_mask() {
int gtid = __kmp_entry_gtid();
kmp_root_t *r = __kmp_threads[gtid]->th.th_root;
if (r->r.r_uber_thread == __kmp_threads[gtid] && !r->r.r_affinity_assigned) {
__kmp_affinity_set_init_mask(gtid, TRUE);
__kmp_affinity_set_init_mask(gtid, /*isa_root=*/TRUE);
__kmp_affinity_bind_init_mask(gtid);
r->r.r_affinity_assigned = TRUE;
}
}
Expand Down
40 changes: 24 additions & 16 deletions openmp/runtime/src/kmp_affinity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4260,8 +4260,8 @@ static void __kmp_affinity_get_topology_info(kmp_affinity_t &affinity) {

// Called when __kmp_topology is ready
static void __kmp_aux_affinity_initialize_other_data(kmp_affinity_t &affinity) {
// Initialize data dependent on __kmp_topology
if (__kmp_topology) {
// Initialize other data structures which depend on the topology
if (__kmp_topology && __kmp_topology->get_num_hw_threads()) {
machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
__kmp_affinity_get_topology_info(affinity);
}
Expand Down Expand Up @@ -4527,8 +4527,6 @@ static void __kmp_aux_affinity_initialize(kmp_affinity_t &affinity) {
if (is_regular_affinity && !__kmp_topology) {
bool success = __kmp_aux_affinity_initialize_topology(affinity);
if (success) {
// Initialize other data structures which depend on the topology
machine_hierarchy.init(__kmp_topology->get_num_hw_threads());
KMP_ASSERT(__kmp_avail_proc == __kmp_topology->get_num_hw_threads());
} else {
affinity.type = affinity_none;
Expand Down Expand Up @@ -4866,14 +4864,12 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
kmp_affin_mask_t *mask;
int i;
const kmp_affinity_t *affinity;
const char *env_var;
bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);

if (is_hidden_helper)
affinity = &__kmp_hh_affinity;
else
affinity = &__kmp_affinity;
env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true);

if (KMP_AFFINITY_NON_PROC_BIND || is_hidden_helper) {
if ((affinity->type == affinity_none) ||
Expand Down Expand Up @@ -4923,19 +4919,34 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
}

if (i == KMP_PLACE_ALL) {
KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n",
KA_TRACE(100, ("__kmp_affinity_set_init_mask: setting T#%d to all places\n",
gtid));
} else {
KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
KA_TRACE(100, ("__kmp_affinity_set_init_mask: setting T#%d to place %d\n",
gtid, i));
}

KMP_CPU_COPY(th->th.th_affin_mask, mask);
}

void __kmp_affinity_bind_init_mask(int gtid) {
if (!KMP_AFFINITY_CAPABLE()) {
return;
}
kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
const kmp_affinity_t *affinity;
const char *env_var;
bool is_hidden_helper = KMP_HIDDEN_HELPER_THREAD(gtid);

if (is_hidden_helper)
affinity = &__kmp_hh_affinity;
else
affinity = &__kmp_affinity;
env_var = __kmp_get_affinity_env_var(*affinity, /*for_binding=*/true);
/* to avoid duplicate printing (will be correctly printed on barrier) */
if (affinity->flags.verbose &&
(affinity->type == affinity_none ||
(i != KMP_PLACE_ALL && affinity->type != affinity_balanced)) &&
if (affinity->flags.verbose && (affinity->type == affinity_none ||
(th->th.th_current_place != KMP_PLACE_ALL &&
affinity->type != affinity_balanced)) &&
!KMP_HIDDEN_HELPER_MAIN_THREAD(gtid)) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
__kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
Expand All @@ -4955,15 +4966,15 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) {
__kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
}

void __kmp_affinity_set_place(int gtid) {
void __kmp_affinity_bind_place(int gtid) {
// Hidden helper threads should not be affected by OMP_PLACES/OMP_PROC_BIND
if (!KMP_AFFINITY_CAPABLE() || KMP_HIDDEN_HELPER_THREAD(gtid)) {
return;
}

kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);

KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current "
KA_TRACE(100, ("__kmp_affinity_bind_place: binding T#%d to place %d (current "
"place = %d)\n",
gtid, th->th.th_new_place, th->th.th_current_place));

Expand All @@ -4985,9 +4996,6 @@ void __kmp_affinity_set_place(int gtid) {
KMP_CPU_INDEX(__kmp_affinity.masks, th->th.th_new_place);
KMP_CPU_COPY(th->th.th_affin_mask, mask);
th->th.th_current_place = th->th.th_new_place;
// Copy topology information associated with the place
th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];

if (__kmp_affinity.flags.verbose) {
char buf[KMP_AFFIN_MASK_PRINT_LEN];
Expand Down
2 changes: 1 addition & 1 deletion openmp/runtime/src/kmp_barrier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2591,7 +2591,7 @@ void __kmp_fork_barrier(int gtid, int tid) {
__kmp_gtid_from_thread(this_thr),
this_thr->th.th_current_place));
} else {
__kmp_affinity_set_place(gtid);
__kmp_affinity_bind_place(gtid);
}
}
#endif // KMP_AFFINITY_SUPPORTED
Expand Down
67 changes: 25 additions & 42 deletions openmp/runtime/src/kmp_runtime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4671,6 +4671,11 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
}
#endif /* KMP_ADJUST_BLOCKTIME */

#if KMP_AFFINITY_SUPPORTED
// Set the affinity and topology information for new thread
__kmp_affinity_set_init_mask(new_gtid, /*isa_root=*/FALSE);
#endif

/* actually fork it and create the new worker thread */
KF_TRACE(
10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr));
Expand Down Expand Up @@ -4764,6 +4769,19 @@ static void __kmp_initialize_team(kmp_team_t *team, int new_nproc,
}

#if KMP_AFFINITY_SUPPORTED
static inline void __kmp_set_thread_place(kmp_team_t *team, kmp_info_t *th,
int first, int last, int newp) {
th->th.th_first_place = first;
th->th.th_last_place = last;
th->th.th_new_place = newp;
if (newp != th->th.th_current_place) {
if (__kmp_display_affinity && team->t.t_display_affinity != 1)
team->t.t_display_affinity = 1;
// Copy topology information associated with the new place
th->th.th_topology_ids = __kmp_affinity.ids[th->th.th_new_place];
th->th.th_topology_attrs = __kmp_affinity.attrs[th->th.th_new_place];
}
}

// __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism.
// It calculates the worker + primary thread's partition based upon the parent
Expand Down Expand Up @@ -4803,13 +4821,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
for (f = 1; f < n_th; f++) {
kmp_info_t *th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th != NULL);
th->th.th_first_place = first_place;
th->th.th_last_place = last_place;
th->th.th_new_place = masters_place;
if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
__kmp_set_thread_place(team, th, first_place, last_place, masters_place);

KA_TRACE(100, ("__kmp_partition_places: primary: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n",
Expand Down Expand Up @@ -4840,13 +4852,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
} else {
place++;
}
th->th.th_first_place = first_place;
th->th.th_last_place = last_place;
th->th.th_new_place = place;
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
__kmp_set_thread_place(team, th, first_place, last_place, place);

KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n",
Expand All @@ -4865,13 +4871,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
kmp_info_t *th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th != NULL);

th->th.th_first_place = first_place;
th->th.th_last_place = last_place;
th->th.th_new_place = place;
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
__kmp_set_thread_place(team, th, first_place, last_place, place);
s_count++;

if ((s_count == S) && rem && (gap_ct == gap)) {
Expand Down Expand Up @@ -4938,12 +4938,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
kmp_info_t *th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th != NULL);

th->th.th_first_place = place;
th->th.th_new_place = place;
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
int fplace = place, nplace = place;
s_count = 1;
while (s_count < S) {
if (place == last_place) {
Expand All @@ -4966,7 +4961,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
rem--;
gap_ct = 0;
}
th->th.th_last_place = place;
__kmp_set_thread_place(team, th, fplace, place, nplace);
gap_ct++;

if (place == last_place) {
Expand Down Expand Up @@ -5032,13 +5027,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
KMP_DEBUG_ASSERT(last_place >= first_place);
th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th);
th->th.th_first_place = first;
th->th.th_new_place = place;
th->th.th_last_place = last;
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
__kmp_set_thread_place(team, th, first, last, place);
KA_TRACE(100,
("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n",
Expand All @@ -5064,13 +5053,7 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
kmp_info_t *th = team->t.t_threads[f];
KMP_DEBUG_ASSERT(th != NULL);

th->th.th_first_place = place;
th->th.th_last_place = place;
th->th.th_new_place = place;
if (__kmp_display_affinity && place != th->th.th_current_place &&
team->t.t_display_affinity != 1) {
team->t.t_display_affinity = 1;
}
__kmp_set_thread_place(team, th, place, place, place);
s_count++;

if ((s_count == S) && rem && (gap_ct == gap)) {
Expand Down
2 changes: 1 addition & 1 deletion openmp/runtime/src/z_Linux_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ static void *__kmp_launch_worker(void *thr) {
#endif /* USE_ITT_BUILD */

#if KMP_AFFINITY_SUPPORTED
__kmp_affinity_set_init_mask(gtid, FALSE);
__kmp_affinity_bind_init_mask(gtid);
#endif

#ifdef KMP_CANCEL_THREADS
Expand Down
2 changes: 1 addition & 1 deletion openmp/runtime/src/z_Windows_NT_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1006,7 +1006,7 @@ extern "C" void *__stdcall __kmp_launch_worker(void *arg) {
__kmp_itt_thread_name(gtid);
#endif /* USE_ITT_BUILD */

__kmp_affinity_set_init_mask(gtid, FALSE);
__kmp_affinity_bind_init_mask(gtid);

#if KMP_ARCH_X86 || KMP_ARCH_X86_64
// Set FP control regs to be a copy of the parallel initialization thread's.
Expand Down

0 comments on commit 99f5969

Please sign in to comment.