Skip to content

Commit

Permalink
[OpenMP] Add OpenMP 5.0 nonmonotonic code
Browse files Browse the repository at this point in the history
This patch adds:
* New omp_sched_monotonic flag to omp_sched_t which is handled within the runtime
* Parsing of monotonic/nonmonotonic in OMP_SCHEDULE
* Tests for the monotonic flag and envirable parsing
* Logic to force monotonic when hierarchical scheduling is used

Differential Revision: https://reviews.llvm.org/D60979

llvm-svn: 359601
  • Loading branch information
jpeyton52 committed Apr 30, 2019
1 parent 3a7532e commit 71abe28
Show file tree
Hide file tree
Showing 11 changed files with 456 additions and 95 deletions.
9 changes: 5 additions & 4 deletions openmp/runtime/src/include/50/omp.h.var
Expand Up @@ -43,10 +43,11 @@

/* schedule kind constants */
typedef enum omp_sched_t {
omp_sched_static = 1,
omp_sched_dynamic = 2,
omp_sched_guided = 3,
omp_sched_auto = 4
omp_sched_static = 1,
omp_sched_dynamic = 2,
omp_sched_guided = 3,
omp_sched_auto = 4,
omp_sched_monotonic = 0x80000000
} omp_sched_t;

/* set API functions */
Expand Down
1 change: 1 addition & 0 deletions openmp/runtime/src/include/50/omp_lib.f.var
Expand Up @@ -61,6 +61,7 @@
integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2
integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3
integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4
integer(kind=omp_sched_kind), parameter :: omp_sched_monotonic = Z'80000000'

integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0
integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1
Expand Down
2 changes: 1 addition & 1 deletion openmp/runtime/src/include/50/omp_lib.f90.var
Expand Up @@ -59,7 +59,7 @@
integer(kind=omp_sched_kind), parameter :: omp_sched_dynamic = 2
integer(kind=omp_sched_kind), parameter :: omp_sched_guided = 3
integer(kind=omp_sched_kind), parameter :: omp_sched_auto = 4

integer(kind=omp_sched_kind), parameter :: omp_sched_monotonic = Z'80000000'

integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_false = 0
integer (kind=omp_proc_bind_kind), parameter :: omp_proc_bind_true = 1
Expand Down
2 changes: 2 additions & 0 deletions openmp/runtime/src/include/50/omp_lib.h.var
Expand Up @@ -68,6 +68,8 @@
parameter(omp_sched_guided=3)
integer(kind=omp_sched_kind)omp_sched_auto
parameter(omp_sched_auto=4)
integer(kind=omp_sched_kind)omp_sched_monotonic
parameter(omp_sched_monotonic=Z'80000000')

integer(kind=omp_proc_bind_kind)omp_proc_bind_false
parameter(omp_proc_bind_false=0)
Expand Down
44 changes: 43 additions & 1 deletion openmp/runtime/src/kmp.h
Expand Up @@ -325,7 +325,8 @@ typedef enum kmp_sched {
kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44)
#endif
kmp_sched_upper,
kmp_sched_default = kmp_sched_static // default scheduling
kmp_sched_default = kmp_sched_static, // default scheduling
kmp_sched_monotonic = 0x80000000
} kmp_sched_t;
#endif

Expand Down Expand Up @@ -438,6 +439,11 @@ enum sched_type : kmp_int32 {
#define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0)
#define SCHEDULE_HAS_NO_MODIFIERS(s) \
(((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0)
#define SCHEDULE_GET_MODIFIERS(s) \
((enum sched_type)( \
(s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)))
#define SCHEDULE_SET_MODIFIERS(s, m) \
(s = (enum sched_type)((kmp_int32)s | (kmp_int32)m))
#else
/* By doing this we hope to avoid multiple tests on OMP_45_ENABLED. Compilers
can now eliminate tests on compile time constants and dead code that results
Expand All @@ -446,11 +452,47 @@ enum sched_type : kmp_int32 {
#define SCHEDULE_HAS_MONOTONIC(s) false
#define SCHEDULE_HAS_NONMONOTONIC(s) false
#define SCHEDULE_HAS_NO_MODIFIERS(s) true
#define SCHEDULE_GET_MODIFIERS(s) ((enum sched_type)0)
#define SCHEDULE_SET_MODIFIERS(s, m) /* Nothing */
#endif
#define SCHEDULE_NONMONOTONIC 0
#define SCHEDULE_MONOTONIC 1

kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */
};

// Apply modifiers on internal kind to standard kind
static inline void
__kmp_sched_apply_mods_stdkind(kmp_sched_t *kind,
enum sched_type internal_kind) {
#if OMP_50_ENABLED
if (SCHEDULE_HAS_MONOTONIC(internal_kind)) {
*kind = (kmp_sched_t)((int)*kind | (int)kmp_sched_monotonic);
}
#endif
}

// Apply modifiers on standard kind to internal kind
static inline void
__kmp_sched_apply_mods_intkind(kmp_sched_t kind,
enum sched_type *internal_kind) {
#if OMP_50_ENABLED
if ((int)kind & (int)kmp_sched_monotonic) {
*internal_kind = (enum sched_type)((int)*internal_kind |
(int)kmp_sch_modifier_monotonic);
}
#endif
}

// Get standard schedule without modifiers
static inline kmp_sched_t __kmp_sched_without_mods(kmp_sched_t kind) {
#if OMP_50_ENABLED
return (kmp_sched_t)((int)kind & ~((int)kmp_sched_monotonic));
#else
return kind;
#endif
}

/* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */
typedef union kmp_r_sched {
struct {
Expand Down
63 changes: 50 additions & 13 deletions openmp/runtime/src/kmp_dispatch.cpp
Expand Up @@ -68,6 +68,20 @@ void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, ident_t *loc_ref) {
}
}

// Returns either SCHEDULE_MONOTONIC or SCHEDULE_NONMONOTONIC
static inline int __kmp_get_monotonicity(enum sched_type schedule,
bool use_hier = false) {
// Pick up the nonmonotonic/monotonic bits from the scheduling type
int monotonicity;
// default to monotonic
monotonicity = SCHEDULE_MONOTONIC;
if (SCHEDULE_HAS_NONMONOTONIC(schedule))
monotonicity = SCHEDULE_NONMONOTONIC;
else if (SCHEDULE_HAS_MONOTONIC(schedule))
monotonicity = SCHEDULE_MONOTONIC;
return monotonicity;
}

// Initialize a dispatch_private_info_template<T> buffer for a particular
// type of schedule,chunk. The loop description is found in lb (lower bound),
// ub (upper bound), and st (stride). nproc is the number of threads relevant
Expand Down Expand Up @@ -95,6 +109,8 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
T tc;
kmp_info_t *th;
kmp_team_t *team;
int monotonicity;
bool use_hier;

#ifdef KMP_DEBUG
typedef typename traits_t<T>::signed_t ST;
Expand Down Expand Up @@ -125,13 +141,16 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
#endif
team->t.t_active_level == 1;
#endif
#if (KMP_STATIC_STEAL_ENABLED)
if (SCHEDULE_HAS_NONMONOTONIC(schedule))
// AC: we now have only one implementation of stealing, so use it
schedule = kmp_sch_static_steal;
else

#if KMP_USE_HIER_SCHED
use_hier = pr->flags.use_hier;
#else
use_hier = false;
#endif
schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);

/* Pick up the nonmonotonic/monotonic bits from the scheduling type */
monotonicity = __kmp_get_monotonicity(schedule, use_hier);
schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);

/* Pick up the nomerge/ordered bits from the scheduling type */
if ((schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper)) {
Expand All @@ -149,6 +168,10 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
} else {
pr->flags.ordered = FALSE;
}
// Ordered overrides nonmonotonic
if (pr->flags.ordered) {
monotonicity = SCHEDULE_MONOTONIC;
}

if (schedule == kmp_sch_static) {
schedule = __kmp_static;
Expand All @@ -157,6 +180,8 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
// Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if
// not specified)
schedule = team->t.t_sched.r_sched_type;
monotonicity = __kmp_get_monotonicity(schedule, use_hier);
schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
// Detail the schedule if needed (global controls are differentiated
// appropriately)
if (schedule == kmp_sch_guided_chunked) {
Expand Down Expand Up @@ -207,7 +232,13 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
}
#endif
}

#if KMP_STATIC_STEAL_ENABLED
// map nonmonotonic:dynamic to static steal
if (schedule == kmp_sch_dynamic_chunked) {
if (monotonicity == SCHEDULE_NONMONOTONIC)
schedule = kmp_sch_static_steal;
}
#endif
/* guided analytical not safe for too many threads */
if (schedule == kmp_sch_guided_analytical_chunked && nproc > 1 << 20) {
schedule = kmp_sch_guided_iterative_chunked;
Expand All @@ -217,6 +248,8 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
if (schedule == kmp_sch_runtime_simd) {
// compiler provides simd_width in the chunk parameter
schedule = team->t.t_sched.r_sched_type;
monotonicity = __kmp_get_monotonicity(schedule, use_hier);
schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule);
// Detail the schedule if needed (global controls are differentiated
// appropriately)
if (schedule == kmp_sch_static || schedule == kmp_sch_auto ||
Expand All @@ -236,9 +269,10 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
{
char *buff;
// create format specifiers before the debug output
buff = __kmp_str_format("__kmp_dispatch_init: T#%%d new: schedule:%%d"
" chunk:%%%s\n",
traits_t<ST>::spec);
buff = __kmp_str_format(
"__kmp_dispatch_init_algorithm: T#%%d new: schedule:%%d"
" chunk:%%%s\n",
traits_t<ST>::spec);
KD_TRACE(10, (buff, gtid, schedule, chunk));
__kmp_str_free(&buff);
}
Expand Down Expand Up @@ -331,7 +365,10 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
pr->u.p.ub = init + small_chunk + (id < extras ? 1 : 0);

pr->u.p.parm2 = lb;
// pr->pfields.parm3 = 0; // it's not used in static_steal
// parm3 is the number of times to attempt stealing which is
// proportional to the number of chunks per thread up until
// the maximum value of nproc.
pr->u.p.parm3 = KMP_MIN(small_chunk + extras, nproc);
pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid
pr->u.p.st = st;
if (traits_t<T>::type_size > 4) {
Expand Down Expand Up @@ -1184,7 +1221,7 @@ int __kmp_dispatch_next_algorithm(int gtid,
}
if (!status) { // try to steal
kmp_info_t **other_threads = team->t.t_threads;
int while_limit = nproc; // nproc attempts to find a victim
int while_limit = pr->u.p.parm3;
int while_index = 0;
// TODO: algorithm of searching for a victim
// should be cleaned up and measured
Expand Down Expand Up @@ -1282,7 +1319,7 @@ int __kmp_dispatch_next_algorithm(int gtid,

if (!status) {
kmp_info_t **other_threads = team->t.t_threads;
int while_limit = nproc; // nproc attempts to find a victim
int while_limit = pr->u.p.parm3;
int while_index = 0;

// TODO: algorithm of searching for a victim
Expand Down
20 changes: 9 additions & 11 deletions openmp/runtime/src/kmp_dispatch_hier.h
Expand Up @@ -691,6 +691,7 @@ template <typename T> struct kmp_hier_t {
sizeof(kmp_hier_top_unit_t<T>) * max);
for (int j = 0; j < max; ++j) {
layers[i][j].active = 0;
layers[i][j].hier_pr.flags.use_hier = TRUE;
}
}
valid = true;
Expand Down Expand Up @@ -949,26 +950,23 @@ void __kmp_dispatch_init_hierarchy(ident_t *loc, int n,
active = !team->t.t_serialized;
th->th.th_ident = loc;
num_hw_threads = __kmp_hier_max_units[kmp_hier_layer_e::LAYER_THREAD + 1];
KMP_DEBUG_ASSERT(th->th.th_dispatch ==
&th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);
my_buffer_index = th->th.th_dispatch->th_disp_index;
pr = reinterpret_cast<dispatch_private_info_template<T> *>(
&th->th.th_dispatch
->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
&team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
if (!active) {
KD_TRACE(10, ("__kmp_dispatch_init_hierarchy: T#%d not active parallel. "
"Using normal dispatch functions.\n",
gtid));
pr = reinterpret_cast<dispatch_private_info_template<T> *>(
th->th.th_dispatch->th_disp_buffer);
KMP_DEBUG_ASSERT(pr);
pr->flags.use_hier = FALSE;
pr->flags.contains_last = FALSE;
return;
}
KMP_DEBUG_ASSERT(th->th.th_dispatch ==
&th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]);

my_buffer_index = th->th.th_dispatch->th_disp_index;
pr = reinterpret_cast<dispatch_private_info_template<T> *>(
&th->th.th_dispatch
->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
sh = reinterpret_cast<dispatch_shared_info_template<T> volatile *>(
&team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]);
KMP_DEBUG_ASSERT(pr);
KMP_DEBUG_ASSERT(sh);
pr->flags.use_hier = TRUE;
Expand Down
25 changes: 21 additions & 4 deletions openmp/runtime/src/kmp_runtime.cpp
Expand Up @@ -2801,9 +2801,13 @@ int __kmp_get_max_active_levels(int gtid) {
return thread->th.th_current_task->td_icvs.max_active_levels;
}

KMP_BUILD_ASSERT(sizeof(kmp_sched_t) == sizeof(int));
KMP_BUILD_ASSERT(sizeof(enum sched_type) == sizeof(int));

/* Changes def_sched_var ICV values (run-time schedule kind and chunk) */
void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
kmp_info_t *thread;
kmp_sched_t orig_kind;
// kmp_team_t *team;

KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n",
Expand All @@ -2814,6 +2818,9 @@ void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
// Valid parameters should fit in one of two intervals - standard or extended:
// <lower>, <valid>, <upper_std>, <lower_ext>, <valid>, <upper>
// 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103
orig_kind = kind;
kind = __kmp_sched_without_mods(kind);

if (kind <= kmp_sched_lower || kind >= kmp_sched_upper ||
(kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) {
// TODO: Hint needs attention in case we change the default schedule.
Expand Down Expand Up @@ -2844,6 +2851,8 @@ void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) {
__kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std -
kmp_sched_lower - 2];
}
__kmp_sched_apply_mods_intkind(
orig_kind, &(thread->th.th_current_task->td_icvs.sched.r_sched_type));
if (kind == kmp_sched_auto || chunk < 1) {
// ignore parameter chunk for schedule auto
thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK;
Expand All @@ -2863,12 +2872,12 @@ void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
thread = __kmp_threads[gtid];

th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type;

switch (th_type) {
switch (SCHEDULE_WITHOUT_MODIFIERS(th_type)) {
case kmp_sch_static:
case kmp_sch_static_greedy:
case kmp_sch_static_balanced:
*kind = kmp_sched_static;
__kmp_sched_apply_mods_stdkind(kind, th_type);
*chunk = 0; // chunk was not set, try to show this fact via zero value
return;
case kmp_sch_static_chunked:
Expand Down Expand Up @@ -2897,6 +2906,7 @@ void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) {
KMP_FATAL(UnknownSchedulingType, th_type);
}

__kmp_sched_apply_mods_stdkind(kind, th_type);
*chunk = thread->th.th_current_task->td_icvs.sched.chunk;
}

Expand Down Expand Up @@ -3025,15 +3035,22 @@ kmp_r_sched_t __kmp_get_schedule_global() {
// __kmp_guided. __kmp_sched should keep original value, so that user can set
// KMP_SCHEDULE multiple times, and thus have different run-time schedules in
// different roots (even in OMP 2.5)
if (__kmp_sched == kmp_sch_static) {
enum sched_type s = SCHEDULE_WITHOUT_MODIFIERS(__kmp_sched);
#if OMP_45_ENABLED
enum sched_type sched_modifiers = SCHEDULE_GET_MODIFIERS(__kmp_sched);
#endif
if (s == kmp_sch_static) {
// replace STATIC with more detailed schedule (balanced or greedy)
r_sched.r_sched_type = __kmp_static;
} else if (__kmp_sched == kmp_sch_guided_chunked) {
} else if (s == kmp_sch_guided_chunked) {
// replace GUIDED with more detailed schedule (iterative or analytical)
r_sched.r_sched_type = __kmp_guided;
} else { // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other
r_sched.r_sched_type = __kmp_sched;
}
#if OMP_45_ENABLED
SCHEDULE_SET_MODIFIERS(r_sched.r_sched_type, sched_modifiers);
#endif

if (__kmp_chunk < KMP_DEFAULT_CHUNK) {
// __kmp_chunk may be wrong here (if it was not ever set)
Expand Down

0 comments on commit 71abe28

Please sign in to comment.