Skip to content

Commit

Permalink
Improvements to machine_hierarchy code for re-sizing
Browse files Browse the repository at this point in the history
These changes include:
 1) Machine hierarchy now uses the base_num_threads field to indicate the 
    maximum number of threads the current hierarchy can handle without a resize.
 2) In __kmp_get_hierarchy, we need to get depth after any potential resize
    is done.
 3) Cleanup of hierarchy resize code to support 1 above.

Differential Revision: http://reviews.llvm.org/D14455

llvm-svn: 252475
  • Loading branch information
jpeyton52 committed Nov 9, 2015
1 parent e961432 commit 7dee82e
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 36 deletions.
7 changes: 4 additions & 3 deletions openmp/runtime/src/kmp_affinity.cpp
Expand Up @@ -33,12 +33,13 @@ void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
if (TCR_1(machine_hierarchy.uninitialized))
machine_hierarchy.init(NULL, nproc);

depth = machine_hierarchy.depth;
KMP_DEBUG_ASSERT(depth > 0);
// Adjust the hierarchy in case num threads exceeds original
if (nproc > machine_hierarchy.skipPerLevel[depth-1])
if (nproc > machine_hierarchy.base_num_threads)
machine_hierarchy.resize(nproc);

depth = machine_hierarchy.depth;
KMP_DEBUG_ASSERT(depth > 0);

thr_bar->depth = depth;
thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
Expand Down
69 changes: 36 additions & 33 deletions openmp/runtime/src/kmp_affinity.h
Expand Up @@ -237,54 +237,57 @@ class hierarchy_info {
void resize(kmp_uint32 nproc)
{
kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
if (bool_result == 0) { // Someone else is resizing
while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
return;
while (bool_result == 0) { // someone else is trying to resize
KMP_CPU_PAUSE();
if (nproc <= base_num_threads) // happy with other thread's resize
return;
else // try to resize
bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
}
KMP_DEBUG_ASSERT(bool_result!=0);
KMP_DEBUG_ASSERT(nproc > base_num_threads);
if (nproc <= base_num_threads) return; // happy with other thread's resize

// Calculate new maxLevels
kmp_uint32 old_sz = skipPerLevel[depth-1];
kmp_uint32 incs = 0, old_maxLevels = maxLevels;
// First see if old maxLevels is enough to contain new size
// First see if old maxLevels is enough to contain new size
for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
skipPerLevel[i] = 2*skipPerLevel[i-1];
numPerLevel[i-1] *= 2;
old_sz *= 2;
depth++;
}
if (nproc <= old_sz) // enough space already
return;
// Not enough space, need to expand hierarchy
while (nproc > old_sz) {
old_sz *=2;
incs++;
depth++;
}
maxLevels += incs;

// Resize arrays
kmp_uint32 *old_numPerLevel = numPerLevel;
kmp_uint32 *old_skipPerLevel = skipPerLevel;
numPerLevel = skipPerLevel = NULL;
numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
skipPerLevel = &(numPerLevel[maxLevels]);
if (nproc > old_sz) { // Not enough space, need to expand hierarchy
while (nproc > old_sz) {
old_sz *=2;
incs++;
depth++;
}
maxLevels += incs;

// Resize arrays
kmp_uint32 *old_numPerLevel = numPerLevel;
kmp_uint32 *old_skipPerLevel = skipPerLevel;
numPerLevel = skipPerLevel = NULL;
numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
skipPerLevel = &(numPerLevel[maxLevels]);

// Copy old elements from old arrays
for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
numPerLevel[i] = old_numPerLevel[i];
skipPerLevel[i] = old_skipPerLevel[i];
}

// Copy old elements from old arrays
for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
numPerLevel[i] = old_numPerLevel[i];
skipPerLevel[i] = old_skipPerLevel[i];
}
// Init new elements in arrays to 1
for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
numPerLevel[i] = 1;
skipPerLevel[i] = 1;
}

// Init new elements in arrays to 1
for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
numPerLevel[i] = 1;
skipPerLevel[i] = 1;
// Free old arrays
__kmp_free(old_numPerLevel);
}

// Free old arrays
__kmp_free(old_numPerLevel);

// Fill in oversubscription levels of hierarchy
for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
skipPerLevel[i] = 2*skipPerLevel[i-1];
Expand Down

0 comments on commit 7dee82e

Please sign in to comment.