diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index dc759ab1c5277..f95d008f2c6a0 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -27,6 +27,9 @@ #ifndef KMP_STATIC_STEAL_ENABLED #define KMP_STATIC_STEAL_ENABLED 1 #endif +#define KMP_WEIGHTED_ITERATIONS_SUPPORTED \ + (KMP_AFFINITY_SUPPORTED && KMP_STATIC_STEAL_ENABLED && \ + (KMP_ARCH_X86 || KMP_ARCH_X86_64)) #define TASK_CURRENT_NOT_QUEUED 0 #define TASK_CURRENT_QUEUED 1 @@ -881,14 +884,8 @@ typedef struct kmp_affinity_flags_t { KMP_BUILD_ASSERT(sizeof(kmp_affinity_flags_t) == 4); typedef struct kmp_affinity_ids_t { + int os_id; int ids[KMP_HW_LAST]; - int operator[](size_t idx) const { return ids[idx]; } - int &operator[](size_t idx) { return ids[idx]; } - kmp_affinity_ids_t &operator=(const kmp_affinity_ids_t &rhs) { - for (int i = 0; i < KMP_HW_LAST; ++i) - ids[i] = rhs[i]; - return *this; - } } kmp_affinity_ids_t; typedef struct kmp_affinity_attrs_t { @@ -938,6 +935,10 @@ extern kmp_affin_mask_t *__kmp_affin_fullMask; extern kmp_affin_mask_t *__kmp_affin_origMask; extern char *__kmp_cpuinfo_file; +#if KMP_WEIGHTED_ITERATIONS_SUPPORTED +extern int __kmp_first_osid_with_ecore; +#endif + #endif /* KMP_AFFINITY_SUPPORTED */ // This needs to be kept in sync with the values in omp.h !!! @@ -1849,12 +1850,9 @@ typedef struct kmp_sched_flags { unsigned ordered : 1; unsigned nomerge : 1; unsigned contains_last : 1; -#if KMP_USE_HIER_SCHED - unsigned use_hier : 1; - unsigned unused : 28; -#else - unsigned unused : 29; -#endif + unsigned use_hier : 1; // Used in KMP_USE_HIER_SCHED code + unsigned use_hybrid : 1; // Used in KMP_WEIGHTED_ITERATIONS_SUPPORTED code + unsigned unused : 27; } kmp_sched_flags_t; KMP_BUILD_ASSERT(sizeof(kmp_sched_flags_t) == 4); @@ -1868,26 +1866,37 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { kmp_int32 st; kmp_int32 tc; kmp_lock_t *steal_lock; // lock used for chunk stealing + + kmp_uint32 ordered_lower; + kmp_uint32 ordered_upper; + // KMP_ALIGN(32) ensures (if the KMP_ALIGN macro is turned on) // a) parm3 is properly aligned and // b) all parm1-4 are on the same cache line. // Because of parm1-4 are used together, performance seems to be better // if they are on the same cache line (not measured though). - struct KMP_ALIGN(32) { // AC: changed 16 to 32 in order to simplify template - kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should - kmp_int32 parm2; // make no real change at least while padding is off. + struct KMP_ALIGN(32) { + kmp_int32 parm1; + kmp_int32 parm2; kmp_int32 parm3; kmp_int32 parm4; }; - kmp_uint32 ordered_lower; - kmp_uint32 ordered_upper; +#if KMP_WEIGHTED_ITERATIONS_SUPPORTED + kmp_uint32 pchunks; + kmp_uint32 num_procs_with_pcore; + kmp_int32 first_thread_with_ecore; +#endif #if KMP_OS_WINDOWS kmp_int32 last_upper; #endif /* KMP_OS_WINDOWS */ } dispatch_private_info32_t; +#if CACHE_LINE <= 128 +KMP_BUILD_ASSERT(sizeof(dispatch_private_info32_t) <= 128); +#endif + typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { kmp_int64 count; // current chunk number for static & static-steal scheduling kmp_int64 ub; /* upper-bound */ @@ -1896,14 +1905,16 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { kmp_int64 st; /* stride */ kmp_int64 tc; /* trip count (number of iterations) */ kmp_lock_t *steal_lock; // lock used for chunk stealing + + kmp_uint64 ordered_lower; + kmp_uint64 ordered_upper; /* parm[1-4] are used in different ways by different scheduling algorithms */ - // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) + // KMP_ALIGN(32) ensures ( if the KMP_ALIGN macro is turned on ) // a) parm3 is properly aligned and // b) all parm1-4 are in the same cache line. // Because of parm1-4 are used together, performance seems to be better // if they are in the same line (not measured though). - struct KMP_ALIGN(32) { kmp_int64 parm1; kmp_int64 parm2; @@ -1911,12 +1922,21 @@ typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { kmp_int64 parm4; }; - kmp_uint64 ordered_lower; - kmp_uint64 ordered_upper; +#if KMP_WEIGHTED_ITERATIONS_SUPPORTED + kmp_uint64 pchunks; + kmp_uint64 num_procs_with_pcore; + kmp_int64 first_thread_with_ecore; +#endif + #if KMP_OS_WINDOWS kmp_int64 last_upper; #endif /* KMP_OS_WINDOWS */ } dispatch_private_info64_t; + +#if CACHE_LINE <= 128 +KMP_BUILD_ASSERT(sizeof(dispatch_private_info64_t) <= 128); +#endif + #else /* KMP_STATIC_STEAL_ENABLED */ typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { kmp_int32 lb; @@ -3862,6 +3882,9 @@ extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask); extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask); extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask); extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size); +#if KMP_WEIGHTED_ITERATIONS_SUPPORTED +extern int __kmp_get_first_osid_with_ecore(void); +#endif #if KMP_OS_LINUX || KMP_OS_FREEBSD extern int kmp_set_thread_affinity_mask_initial(void); #endif diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index 8c608d78bb56f..7009730a49ba7 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -4196,7 +4196,7 @@ static void __kmp_affinity_get_mask_topology_info(const kmp_affin_mask_t *mask, // Initiailze ids and attrs thread data for (int i = 0; i < KMP_HW_LAST; ++i) - ids[i] = kmp_hw_thread_t::UNKNOWN_ID; + ids.ids[i] = kmp_hw_thread_t::UNKNOWN_ID; attrs = KMP_AFFINITY_ATTRS_UNKNOWN; // Iterate through each os id within the mask and determine @@ -4205,19 +4205,20 @@ static void __kmp_affinity_get_mask_topology_info(const kmp_affin_mask_t *mask, int depth = __kmp_topology->get_depth(); KMP_CPU_SET_ITERATE(cpu, mask) { int osid_idx = __kmp_osid_to_hwthread_map[cpu]; + ids.os_id = cpu; const kmp_hw_thread_t &hw_thread = __kmp_topology->at(osid_idx); for (int level = 0; level < depth; ++level) { kmp_hw_t type = __kmp_topology->get_type(level); int id = hw_thread.sub_ids[level]; - if (ids[type] == kmp_hw_thread_t::UNKNOWN_ID || ids[type] == id) { - ids[type] = id; + if (ids.ids[type] == kmp_hw_thread_t::UNKNOWN_ID || ids.ids[type] == id) { + ids.ids[type] = id; } else { // This mask spans across multiple topology units, set it as such // and mark every level below as such as well. - ids[type] = kmp_hw_thread_t::MULTIPLE_ID; + ids.ids[type] = kmp_hw_thread_t::MULTIPLE_ID; for (; level < depth; ++level) { kmp_hw_t type = __kmp_topology->get_type(level); - ids[type] = kmp_hw_thread_t::MULTIPLE_ID; + ids.ids[type] = kmp_hw_thread_t::MULTIPLE_ID; } } } @@ -4297,6 +4298,9 @@ static void __kmp_aux_affinity_initialize_other_data(kmp_affinity_t &affinity) { if (__kmp_topology && __kmp_topology->get_num_hw_threads()) { machine_hierarchy.init(__kmp_topology->get_num_hw_threads()); __kmp_affinity_get_topology_info(affinity); +#if KMP_WEIGHTED_ITERATIONS_SUPPORTED + __kmp_first_osid_with_ecore = __kmp_get_first_osid_with_ecore(); +#endif } } @@ -4876,7 +4880,7 @@ void __kmp_affinity_set_init_mask(int gtid, int isa_root) { // Set the thread topology information to default of unknown for (int id = 0; id < KMP_HW_LAST; ++id) - th->th.th_topology_ids[id] = kmp_hw_thread_t::UNKNOWN_ID; + th->th.th_topology_ids.ids[id] = kmp_hw_thread_t::UNKNOWN_ID; th->th.th_topology_attrs = KMP_AFFINITY_ATTRS_UNKNOWN; if (!KMP_AFFINITY_CAPABLE()) { @@ -5273,6 +5277,28 @@ int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) { return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask)); } +#if KMP_WEIGHTED_ITERATIONS_SUPPORTED +// Returns first os proc id with ATOM core +int __kmp_get_first_osid_with_ecore(void) { + int low = 0; + int high = __kmp_topology->get_num_hw_threads() - 1; + int mid = 0; + while (high - low > 1) { + mid = (high + low) / 2; + if (__kmp_topology->at(mid).attrs.get_core_type() == + KMP_HW_CORE_TYPE_CORE) { + low = mid + 1; + } else { + high = mid; + } + } + if (__kmp_topology->at(mid).attrs.get_core_type() == KMP_HW_CORE_TYPE_ATOM) { + return mid; + } + return -1; +} +#endif + // Dynamic affinity settings - Affinity balanced void __kmp_balanced_affinity(kmp_info_t *th, int nthreads) { KMP_DEBUG_ASSERT(th); diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp index a6ee844e59886..ac85b2b3f2fcd 100644 --- a/openmp/runtime/src/kmp_dispatch.cpp +++ b/openmp/runtime/src/kmp_dispatch.cpp @@ -90,6 +90,70 @@ static inline int __kmp_get_monotonicity(ident_t *loc, enum sched_type schedule, return monotonicity; } +#if KMP_WEIGHTED_ITERATIONS_SUPPORTED +// Return floating point number rounded to two decimal points +static inline float __kmp_round_2decimal_val(float num) { + return (float)(static_cast(num * 100 + 0.5)) / 100; +} +static inline int __kmp_get_round_val(float num) { + return static_cast(num < 0 ? num - 0.5 : num + 0.5); +} +#endif + +template +inline void +__kmp_initialize_self_buffer(kmp_team_t *team, T id, + dispatch_private_info_template *pr, + typename traits_t::unsigned_t nchunks, T nproc, + typename traits_t::unsigned_t &init, + T &small_chunk, T &extras, T &p_extra) { + +#if KMP_WEIGHTED_ITERATIONS_SUPPORTED + if (pr->flags.use_hybrid) { + kmp_info_t *th = __kmp_threads[__kmp_gtid_from_tid((int)id, team)]; + kmp_hw_core_type_t type = + (kmp_hw_core_type_t)th->th.th_topology_attrs.core_type; + T pchunks = pr->u.p.pchunks; + T echunks = nchunks - pchunks; + T num_procs_with_pcore = pr->u.p.num_procs_with_pcore; + T num_procs_with_ecore = nproc - num_procs_with_pcore; + T first_thread_with_ecore = pr->u.p.first_thread_with_ecore; + T big_chunk = + pchunks / num_procs_with_pcore; // chunks per thread with p-core + small_chunk = + echunks / num_procs_with_ecore; // chunks per thread with e-core + + extras = + (pchunks % num_procs_with_pcore) + (echunks % num_procs_with_ecore); + + p_extra = (big_chunk - small_chunk); + + if (type == KMP_HW_CORE_TYPE_CORE) { + if (id < first_thread_with_ecore) { + init = id * small_chunk + id * p_extra + (id < extras ? id : extras); + } else { + init = id * small_chunk + (id - num_procs_with_ecore) * p_extra + + (id < extras ? id : extras); + } + } else { + if (id == first_thread_with_ecore) { + init = id * small_chunk + id * p_extra + (id < extras ? id : extras); + } else { + init = id * small_chunk + first_thread_with_ecore * p_extra + + (id < extras ? id : extras); + } + } + p_extra = (type == KMP_HW_CORE_TYPE_CORE) ? p_extra : 0; + return; + } +#endif + + small_chunk = nchunks / nproc; // chunks per thread + extras = nchunks % nproc; + p_extra = 0; + init = id * small_chunk + (id < extras ? id : extras); +} + #if KMP_STATIC_STEAL_ENABLED enum { // values for steal_flag (possible states of private per-loop buffer) UNUSED = 0, @@ -366,7 +430,7 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, switch (schedule) { #if KMP_STATIC_STEAL_ENABLED case kmp_sch_static_steal: { - T ntc, init; + T ntc, init = 0; KD_TRACE(100, ("__kmp_dispatch_init_algorithm: T#%d kmp_sch_static_steal case\n", @@ -376,7 +440,7 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, if (nproc > 1 && ntc >= nproc) { KMP_COUNT_BLOCK(OMP_LOOP_STATIC_STEAL); T id = tid; - T small_chunk, extras; + T small_chunk, extras, p_extra = 0; kmp_uint32 old = UNUSED; int claimed = pr->steal_flag.compare_exchange_strong(old, CLAIMED); if (traits_t::type_size > 4) { @@ -388,13 +452,110 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, pr->u.p.steal_lock = (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t)); __kmp_init_lock(pr->u.p.steal_lock); } - small_chunk = ntc / nproc; - extras = ntc % nproc; - init = id * small_chunk + (id < extras ? id : extras); +#if KMP_WEIGHTED_ITERATIONS_SUPPORTED + // Iterations are divided in a 60/40 skewed distribution among CORE and + // ATOM processors for hybrid systems + bool use_hybrid = false; + kmp_hw_core_type_t core_type = KMP_HW_CORE_TYPE_UNKNOWN; + T first_thread_with_ecore = 0; + T num_procs_with_pcore = 0; + T num_procs_with_ecore = 0; + T p_ntc = 0, e_ntc = 0; + if (__kmp_is_hybrid_cpu() && __kmp_affinity.type != affinity_none && + __kmp_affinity.type != affinity_explicit) { + use_hybrid = true; + core_type = (kmp_hw_core_type_t)th->th.th_topology_attrs.core_type; + if (core_type != KMP_HW_CORE_TYPE_UNKNOWN && + __kmp_first_osid_with_ecore > -1) { + for (int i = 0; i < team->t.t_nproc; ++i) { + kmp_hw_core_type_t type = (kmp_hw_core_type_t)team->t.t_threads[i] + ->th.th_topology_attrs.core_type; + int id = team->t.t_threads[i]->th.th_topology_ids.os_id; + if (id == __kmp_first_osid_with_ecore) { + first_thread_with_ecore = + team->t.t_threads[i]->th.th_info.ds.ds_tid; + } + if (type == KMP_HW_CORE_TYPE_CORE) { + num_procs_with_pcore++; + } else if (type == KMP_HW_CORE_TYPE_ATOM) { + num_procs_with_ecore++; + } else { + use_hybrid = false; + break; + } + } + } + if (num_procs_with_pcore > 0 && num_procs_with_ecore > 0) { + float multiplier = 60.0 / 40.0; + float p_ratio = (float)num_procs_with_pcore / nproc; + float e_ratio = (float)num_procs_with_ecore / nproc; + float e_multiplier = + (float)1 / + (((multiplier * num_procs_with_pcore) / nproc) + e_ratio); + float p_multiplier = multiplier * e_multiplier; + p_ntc = __kmp_get_round_val(ntc * p_ratio * p_multiplier); + if ((int)p_ntc > (int)(ntc * p_ratio * p_multiplier)) + e_ntc = + (int)(__kmp_round_2decimal_val(ntc * e_ratio * e_multiplier)); + else + e_ntc = __kmp_get_round_val(ntc * e_ratio * e_multiplier); + KMP_DEBUG_ASSERT(ntc == p_ntc + e_ntc); + + // Use regular static steal if not enough chunks for skewed + // distribution + use_hybrid = (use_hybrid && (p_ntc >= num_procs_with_pcore && + e_ntc >= num_procs_with_ecore) + ? true + : false); + } else { + use_hybrid = false; + } + } + pr->flags.use_hybrid = use_hybrid; + pr->u.p.pchunks = p_ntc; + pr->u.p.num_procs_with_pcore = num_procs_with_pcore; + pr->u.p.first_thread_with_ecore = first_thread_with_ecore; + + if (use_hybrid) { + KMP_DEBUG_ASSERT(nproc == num_procs_with_pcore + num_procs_with_ecore); + T big_chunk = p_ntc / num_procs_with_pcore; + small_chunk = e_ntc / num_procs_with_ecore; + + extras = + (p_ntc % num_procs_with_pcore) + (e_ntc % num_procs_with_ecore); + + p_extra = (big_chunk - small_chunk); + + if (core_type == KMP_HW_CORE_TYPE_CORE) { + if (id < first_thread_with_ecore) { + init = + id * small_chunk + id * p_extra + (id < extras ? id : extras); + } else { + init = id * small_chunk + (id - num_procs_with_ecore) * p_extra + + (id < extras ? id : extras); + } + } else { + if (id == first_thread_with_ecore) { + init = + id * small_chunk + id * p_extra + (id < extras ? id : extras); + } else { + init = id * small_chunk + first_thread_with_ecore * p_extra + + (id < extras ? id : extras); + } + } + p_extra = (core_type == KMP_HW_CORE_TYPE_CORE) ? p_extra : 0; + } else +#endif + { + small_chunk = ntc / nproc; + extras = ntc % nproc; + init = id * small_chunk + (id < extras ? id : extras); + p_extra = 0; + } pr->u.p.count = init; if (claimed) { // are we succeeded in claiming own buffer? - pr->u.p.ub = init + small_chunk + (id < extras ? 1 : 0); + pr->u.p.ub = init + small_chunk + p_extra + (id < extras ? 1 : 0); // Other threads will inspect steal_flag when searching for a victim. // READY means other threads may steal from this thread from now on. KMP_ATOMIC_ST_REL(&pr->steal_flag, READY); @@ -1261,13 +1422,13 @@ int __kmp_dispatch_next_algorithm(int gtid, if (status) { // initialize self buffer with victim's whole range of chunks T id = victimId; - T small_chunk, extras; - small_chunk = nchunks / nproc; // chunks per thread - extras = nchunks % nproc; - init = id * small_chunk + (id < extras ? id : extras); + T small_chunk = 0, extras = 0, p_extra = 0; + __kmp_initialize_self_buffer(team, id, pr, nchunks, nproc, + init, small_chunk, extras, + p_extra); __kmp_acquire_lock(lck, gtid); pr->u.p.count = init + 1; // exclude one we execute immediately - pr->u.p.ub = init + small_chunk + (id < extras ? 1 : 0); + pr->u.p.ub = init + small_chunk + p_extra + (id < extras ? 1 : 0); __kmp_release_lock(lck, gtid); pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid // no need to reinitialize other thread invariants: lb, st, etc. @@ -1275,10 +1436,10 @@ int __kmp_dispatch_next_algorithm(int gtid, { char *buff; // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, " - "count:%%%s ub:%%%s\n", - traits_t::spec, traits_t::spec); + buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d " + "stolen chunks from T#%%d, " + "count:%%%s ub:%%%s\n", + traits_t::spec, traits_t::spec); KD_TRACE(10, (buff, gtid, id, pr->u.p.count, pr->u.p.ub)); __kmp_str_free(&buff); } @@ -1404,12 +1565,12 @@ int __kmp_dispatch_next_algorithm(int gtid, if (status) { // initialize self buffer with victim's whole range of chunks T id = victimId; - T small_chunk, extras; - small_chunk = nchunks / nproc; // chunks per thread - extras = nchunks % nproc; - init = id * small_chunk + (id < extras ? id : extras); + T small_chunk = 0, extras = 0, p_extra = 0; + __kmp_initialize_self_buffer(team, id, pr, nchunks, nproc, + init, small_chunk, extras, + p_extra); vnew.p.count = init + 1; - vnew.p.ub = init + small_chunk + (id < extras ? 1 : 0); + vnew.p.ub = init + small_chunk + p_extra + (id < extras ? 1 : 0); // write pair (count, ub) at once atomically #if KMP_ARCH_X86 KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), vnew.b); @@ -1422,10 +1583,10 @@ int __kmp_dispatch_next_algorithm(int gtid, { char *buff; // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d stolen chunks from T#%%d, " - "count:%%%s ub:%%%s\n", - traits_t::spec, traits_t::spec); + buff = __kmp_str_format("__kmp_dispatch_next_algorithm: T#%%d " + "stolen chunks from T#%%d, " + "count:%%%s ub:%%%s\n", + traits_t::spec, traits_t::spec); KD_TRACE(10, (buff, gtid, id, pr->u.p.count, pr->u.p.ub)); __kmp_str_free(&buff); } diff --git a/openmp/runtime/src/kmp_dispatch.h b/openmp/runtime/src/kmp_dispatch.h index 154db174613db..cf19eb52662ce 100644 --- a/openmp/runtime/src/kmp_dispatch.h +++ b/openmp/runtime/src/kmp_dispatch.h @@ -75,14 +75,17 @@ template struct dispatch_private_infoXX_template { ST st; // signed UT tc; // unsigned kmp_lock_t *steal_lock; // lock used for chunk stealing + + UT ordered_lower; // unsigned + UT ordered_upper; // unsigned + /* parm[1-4] are used in different ways by different scheduling algorithms */ - // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) + // KMP_ALIGN(32) ensures ( if the KMP_ALIGN macro is turned on ) // a) parm3 is properly aligned and // b) all parm1-4 are in the same cache line. // Because of parm1-4 are used together, performance seems to be better // if they are in the same line (not measured though). - struct KMP_ALIGN(32) { // compiler does not accept sizeof(T)*4 T parm1; T parm2; @@ -90,8 +93,11 @@ template struct dispatch_private_infoXX_template { T parm4; }; - UT ordered_lower; // unsigned - UT ordered_upper; // unsigned +#if KMP_WEIGHTED_ITERATIONS_SUPPORTED + UT pchunks; // total number of chunks for processes with p-core + UT num_procs_with_pcore; // number of threads with p-core + T first_thread_with_ecore; +#endif #if KMP_OS_WINDOWS T last_upper; #endif /* KMP_OS_WINDOWS */ diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index 48097fb530d1c..b132f38fd3b08 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -282,6 +282,9 @@ kmp_affinity_t __kmp_hh_affinity = kmp_affinity_t *__kmp_affinities[] = {&__kmp_affinity, &__kmp_hh_affinity}; char *__kmp_cpuinfo_file = NULL; +#if KMP_WEIGHTED_ITERATIONS_SUPPORTED +int __kmp_first_osid_with_ecore = -1; +#endif #endif /* KMP_AFFINITY_SUPPORTED */ diff --git a/openmp/runtime/test/worksharing/for/omp_for_schedule_dynamic.c b/openmp/runtime/test/worksharing/for/omp_for_schedule_dynamic.c index 4433d2a3dafbe..419187321d28d 100644 --- a/openmp/runtime/test/worksharing/for/omp_for_schedule_dynamic.c +++ b/openmp/runtime/test/worksharing/for/omp_for_schedule_dynamic.c @@ -1,4 +1,5 @@ // RUN: %libomp-compile-and-run +// RUN: env KMP_AFFINITY=compact,0 %libomp-run /* * Test for dynamic scheduling with chunk size * Method: calculate how many times the iteration space is dispatched