Skip to content

Commit

Permalink
Adding Hwloc library option for affinity mechanism
Browse files Browse the repository at this point in the history
These changes allow libhwloc to be used as the topology discovery/affinity
mechanism for libomp.  It is supported on Unices. The code additions:
* Canonicalize KMP_CPU_* interface macros so bitmask operations are
  implementation independent and work with both hwloc bitmaps and libomp
  bitmaps.  So there are new KMP_CPU_ALLOC_* and KMP_CPU_ITERATE() macros and
  the like. These are all in kmp.h and appropriately placed.
* Hwloc topology discovery code in kmp_affinity.cpp. This uses the hwloc
  interface to create a libomp address2os object which the rest of libomp knows
  how to handle already.
* To build, use -DLIBOMP_USE_HWLOC=on and
  -DLIBOMP_HWLOC_INSTALL_DIR=/path/to/install/dir [default /usr/local]. If CMake
  can't find the library or hwloc.h, then it will tell you and exit.

Differential Revision: http://reviews.llvm.org/D13991

llvm-svn: 254320
  • Loading branch information
jpeyton52 committed Nov 30, 2015
1 parent 7a09659 commit 01dcf36
Show file tree
Hide file tree
Showing 18 changed files with 789 additions and 138 deletions.
12 changes: 12 additions & 0 deletions openmp/runtime/Build_With_CMake.txt
Expand Up @@ -159,6 +159,18 @@ Should include stats-gathering code be included in the build?
-DLIBOMP_USE_DEBUGGER=off|on
Should the friendly debugger interface be included in the build?

-DLIBOMP_USE_HWLOC=off|on
Should the Hwloc library be used for affinity?
This option is not supported on Windows.
http://www.open-mpi.org/projects/hwloc

-DLIBOMP_HWLOC_INSTALL_DIR=/path/to/hwloc/install/dir
Default: /usr/local
This option is only used if LIBOMP_USE_HWLOC is on.
Specifies install location of Hwloc. The configuration system will look for
hwloc.h in ${LIBOMP_HWLOC_INSTALL_DIR}/include and the library in
${LIBOMP_HWLOC_INSTALL_DIR}/lib.

================================
How to append flags to the build
================================
Expand Down
12 changes: 12 additions & 0 deletions openmp/runtime/CMakeLists.txt
Expand Up @@ -135,6 +135,12 @@ set(LIBOMP_FFLAGS "" CACHE STRING
set(LIBOMP_COPY_EXPORTS TRUE CACHE STRING
"Should exports be copied into source exports/ directory?")

# HWLOC-support
set(LIBOMP_USE_HWLOC FALSE CACHE BOOL
"Use Hwloc (http://www.open-mpi.org/projects/hwloc/) library for affinity?")
set(LIBOMP_HWLOC_INSTALL_DIR /usr/local CACHE PATH
"Install path for hwloc library")

# Get the build number from kmp_version.c
libomp_get_build_number("${CMAKE_CURRENT_SOURCE_DIR}" LIBOMP_VERSION_BUILD)
math(EXPR LIBOMP_VERSION_BUILD_YEAR "${LIBOMP_VERSION_BUILD}/10000")
Expand Down Expand Up @@ -285,6 +291,11 @@ if(LIBOMP_OMPT_SUPPORT AND (NOT LIBOMP_HAVE_OMPT_SUPPORT))
libomp_error_say("OpenMP Tools Interface requested but not available")
endif()

# Error check hwloc support after config-ix has run
if(LIBOMP_USE_HWLOC AND (NOT LIBOMP_HAVE_HWLOC))
libomp_error_say("Hwloc requested but not available")
endif()

# Setting final library name
set(LIBOMP_DEFAULT_LIB_NAME libomp)
if(${PROFILE_LIBRARY})
Expand Down Expand Up @@ -323,6 +334,7 @@ if(${LIBOMP_STANDALONE_BUILD})
endif()
libomp_say("Use Adaptive locks -- ${LIBOMP_USE_ADAPTIVE_LOCKS}")
libomp_say("Use quad precision -- ${LIBOMP_USE_QUAD_PRECISION}")
libomp_say("Use Hwloc library -- ${LIBOMP_USE_HWLOC}")
endif()

add_subdirectory(src)
Expand Down
1 change: 1 addition & 0 deletions openmp/runtime/cmake/LibompHandleFlags.cmake
Expand Up @@ -151,6 +151,7 @@ endfunction()
function(libomp_get_libflags libflags)
set(libflags_local)
libomp_append(libflags_local "${CMAKE_THREAD_LIBS_INIT}")
libomp_append(libflags_local "${LIBOMP_HWLOC_LIBRARY}" LIBOMP_USE_HWLOC)
if(${IA32})
libomp_append(libflags_local -lirc_pic LIBOMP_HAVE_IRC_PIC_LIBRARY)
endif()
Expand Down
6 changes: 6 additions & 0 deletions openmp/runtime/cmake/LibompMicroTests.cmake
Expand Up @@ -82,10 +82,13 @@ else() # (Unix based systems, Intel(R) MIC Architecture, and Mac)
libomp_append(libomp_test_touch_cflags -m32 LIBOMP_HAVE_M32_FLAG)
endif()
libomp_append(libomp_test_touch_libs ${LIBOMP_OUTPUT_DIRECTORY}/${LIBOMP_LIB_FILE})
libomp_append(libomp_test_touch_libs "${LIBOMP_HWLOC_LIBRARY}" LIBOMP_USE_HWLOC)
if(APPLE)
set(libomp_test_touch_env "DYLD_LIBRARY_PATH=.:${LIBOMP_OUTPUT_DIRECTORY}:$ENV{DYLD_LIBRARY_PATH}")
libomp_append(libomp_test_touch_ldflags "-Wl,-rpath,${LIBOMP_HWLOC_LIBRARY_DIR}" LIBOMP_USE_HWLOC)
else()
set(libomp_test_touch_env "LD_LIBRARY_PATH=.:${LIBOMP_OUTPUT_DIRECTORY}:$ENV{LD_LIBRARY_PATH}")
libomp_append(libomp_test_touch_ldflags "-Wl,-rpath=${LIBOMP_HWLOC_LIBRARY_DIR}" LIBOMP_USE_HWLOC)
endif()
endif()
macro(libomp_test_touch_recipe test_touch_dir)
Expand Down Expand Up @@ -169,8 +172,10 @@ add_custom_target(libomp-test-deps DEPENDS test-deps/.success)
set(libomp_expected_library_deps)
if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
set(libomp_expected_library_deps libc.so.7 libthr.so.3)
libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD")
set(libomp_expected_library_deps libc.so.12 libpthread.so.1 libm.so.0)
libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
elseif(APPLE)
set(libomp_expected_library_deps /usr/lib/libSystem.B.dylib)
elseif(WIN32)
Expand Down Expand Up @@ -203,6 +208,7 @@ else()
libomp_append(libomp_expected_library_deps ld64.so.1)
endif()
libomp_append(libomp_expected_library_deps libpthread.so.0 IF_FALSE STUBS_LIBRARY)
libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
endif()
libomp_append(libomp_expected_library_deps libstdc++.so.6 LIBOMP_USE_STDCPPLIB)
endif()
Expand Down
23 changes: 23 additions & 0 deletions openmp/runtime/cmake/config-ix.cmake
Expand Up @@ -12,6 +12,7 @@
include(CheckCCompilerFlag)
include(CheckCSourceCompiles)
include(CheckCXXCompilerFlag)
include(CheckIncludeFile)
include(CheckLibraryExists)
include(CheckIncludeFiles)
include(LibompCheckLinkerFlag)
Expand Down Expand Up @@ -211,3 +212,25 @@ else()
endif()
endif()

# Check if HWLOC support is available
if(${LIBOMP_USE_HWLOC})
if(WIN32)
set(LIBOMP_HAVE_HWLOC FALSE)
libomp_say("Using hwloc not supported on Windows yet")
else()
set(CMAKE_REQUIRED_INCLUDES ${LIBOMP_HWLOC_INSTALL_DIR}/include)
check_include_file(hwloc.h LIBOMP_HAVE_HWLOC_H)
set(CMAKE_REQUIRED_INCLUDES)
check_library_exists(hwloc hwloc_topology_init
${LIBOMP_HWLOC_INSTALL_DIR}/lib LIBOMP_HAVE_LIBHWLOC)
find_library(LIBOMP_HWLOC_LIBRARY hwloc ${LIBOMP_HWLOC_INSTALL_DIR}/lib)
get_filename_component(LIBOMP_HWLOC_LIBRARY_DIR ${LIBOMP_HWLOC_LIBRARY} PATH)
if(LIBOMP_HAVE_HWLOC_H AND LIBOMP_HAVE_LIBHWLOC AND LIBOMP_HWLOC_LIBRARY)
set(LIBOMP_HAVE_HWLOC TRUE)
else()
set(LIBOMP_HAVE_HWLOC FALSE)
libomp_say("Could not find hwloc")
endif()
endif()
endif()

3 changes: 3 additions & 0 deletions openmp/runtime/src/CMakeLists.txt
Expand Up @@ -42,6 +42,9 @@ include_directories(
${LIBOMP_INC_DIR}
${LIBOMP_SRC_DIR}/thirdparty/ittnotify
)
if(${LIBOMP_USE_HWLOC})
include_directories(${LIBOMP_HWLOC_INSTALL_DIR}/include)
endif()

# Getting correct source files to build library
set(LIBOMP_CFILES)
Expand Down
3 changes: 3 additions & 0 deletions openmp/runtime/src/i18n/en_US.txt
Expand Up @@ -405,6 +405,9 @@ AffGranTopGroup "%1$s: granularity=%2$s is not supported with KMP_T
AffGranGroupType "%1$s: granularity=group is not supported with KMP_AFFINITY=%2$s. Using \"granularity=core\"."
AffThrPlaceManySockets "KMP_PLACE_THREADS ignored: too many sockets requested."
AffThrPlaceDeprecated "KMP_PLACE_THREADS \"o\" offset designator deprecated, please use @ prefix for offset value."
AffUsingHwloc "%1$s: Affinity capable, using hwloc."
AffIgnoringHwloc "%1$s: Ignoring hwloc mechanism."
AffHwlocErrorOccurred "%1$s: Hwloc failed in %2$s. Relying on internal affinity mechanisms."


# --------------------------------------------------------------------------------------------------
Expand Down
101 changes: 97 additions & 4 deletions openmp/runtime/src/kmp.h
Expand Up @@ -77,10 +77,18 @@

#include "kmp_os.h"

#include "kmp_safe_c_api.h"

#if KMP_STATS_ENABLED
class kmp_stats_list;
#endif

#if KMP_USE_HWLOC
#include "hwloc.h"
extern hwloc_topology_t __kmp_hwloc_topology;
extern int __kmp_hwloc_error;
#endif

#if KMP_ARCH_X86 || KMP_ARCH_X86_64
#include <xmmintrin.h>
#endif
Expand Down Expand Up @@ -488,6 +496,78 @@ extern size_t __kmp_affin_mask_size;
# define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size)
# define KMP_CPU_SETSIZE (__kmp_affin_mask_size * CHAR_BIT)

#if KMP_USE_HWLOC

typedef hwloc_cpuset_t kmp_affin_mask_t;
# define KMP_CPU_SET(i,mask) hwloc_bitmap_set((hwloc_cpuset_t)mask, (unsigned)i)
# define KMP_CPU_ISSET(i,mask) hwloc_bitmap_isset((hwloc_cpuset_t)mask, (unsigned)i)
# define KMP_CPU_CLR(i,mask) hwloc_bitmap_clr((hwloc_cpuset_t)mask, (unsigned)i)
# define KMP_CPU_ZERO(mask) hwloc_bitmap_zero((hwloc_cpuset_t)mask)
# define KMP_CPU_COPY(dest, src) hwloc_bitmap_copy((hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
{ \
unsigned i; \
for(i=0;i<(unsigned)max_bit_number+1;i++) { \
if(hwloc_bitmap_isset((hwloc_cpuset_t)mask, i)) { \
hwloc_bitmap_clr((hwloc_cpuset_t)mask, i); \
} else { \
hwloc_bitmap_set((hwloc_cpuset_t)mask, i); \
} \
} \
} \

# define KMP_CPU_UNION(dest, src) hwloc_bitmap_or((hwloc_cpuset_t)dest, (hwloc_cpuset_t)dest, (hwloc_cpuset_t)src)
# define KMP_CPU_SET_ITERATE(i,mask) \
for(i = hwloc_bitmap_first((hwloc_cpuset_t)mask); (int)i != -1; i = hwloc_bitmap_next((hwloc_cpuset_t)mask, i))

# define KMP_CPU_ALLOC(ptr) ptr = (kmp_affin_mask_t*)hwloc_bitmap_alloc()
# define KMP_CPU_FREE(ptr) hwloc_bitmap_free((hwloc_bitmap_t)ptr);
# define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr)
# define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr)
# define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr)
# define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr)

//
// The following macro should be used to index an array of masks.
// The array should be declared as "kmp_affinity_t *" and allocated with
// size "__kmp_affinity_mask_size * len". The macro takes care of the fact
// that on Windows* OS, sizeof(kmp_affin_t) is really the size of the mask, but
// on Linux* OS, sizeof(kmp_affin_t) is 1.
//
# define KMP_CPU_INDEX(array,i) ((kmp_affin_mask_t*)(array[i]))
# define KMP_CPU_ALLOC_ARRAY(arr, n) { \
arr = (kmp_affin_mask_t *)__kmp_allocate(n*sizeof(kmp_affin_mask_t)); \
unsigned i; \
for(i=0;i<(unsigned)n;i++) { \
arr[i] = hwloc_bitmap_alloc(); \
} \
}
# define KMP_CPU_FREE_ARRAY(arr, n) { \
unsigned i; \
for(i=0;i<(unsigned)n;i++) { \
hwloc_bitmap_free(arr[i]); \
} \
__kmp_free(arr); \
}
# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) { \
arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n*sizeof(kmp_affin_mask_t)); \
unsigned i; \
for(i=0;i<(unsigned)n;i++) { \
arr[i] = hwloc_bitmap_alloc(); \
} \
}
# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) { \
unsigned i; \
for(i=0;i<(unsigned)n;i++) { \
hwloc_bitmap_free(arr[i]); \
} \
KMP_INTERNAL_FREE(arr); \
}

#else /* KMP_USE_HWLOC */
# define KMP_CPU_SET_ITERATE(i,mask) \
for(i = 0; (size_t)i < KMP_CPU_SETSIZE; ++i)

# if KMP_OS_LINUX
//
// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size
Expand Down Expand Up @@ -526,7 +606,7 @@ typedef unsigned char kmp_affin_mask_t;
} \
}

# define KMP_CPU_COMPLEMENT(mask) \
# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
{ \
size_t __i; \
for (__i = 0; __i < __kmp_affin_mask_size; __i++) { \
Expand Down Expand Up @@ -605,7 +685,7 @@ extern int __kmp_num_proc_groups;
} \
}

# define KMP_CPU_COMPLEMENT(mask) \
# define KMP_CPU_COMPLEMENT(max_bit_number, mask) \
{ \
int __i; \
for (__i = 0; __i < __kmp_num_proc_groups; __i++) { \
Expand Down Expand Up @@ -637,7 +717,7 @@ extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity;

extern int __kmp_get_proc_group(kmp_affin_mask_t const *mask);

# else
# else /* KMP_GROUP_AFFINITY */

typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */

Expand All @@ -646,7 +726,7 @@ typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
# define KMP_CPU_CLR(i,mask) (*(mask) &= ~(((kmp_affin_mask_t)1) << (i)))
# define KMP_CPU_ZERO(mask) (*(mask) = 0)
# define KMP_CPU_COPY(dest, src) (*(dest) = *(src))
# define KMP_CPU_COMPLEMENT(mask) (*(mask) = ~*(mask))
# define KMP_CPU_COMPLEMENT(max_bit_number, mask) (*(mask) = ~*(mask))
# define KMP_CPU_UNION(dest, src) (*(dest) |= *(src))

# endif /* KMP_GROUP_AFFINITY */
Expand All @@ -660,6 +740,10 @@ typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
# define KMP_CPU_ALLOC(ptr) \
(ptr = ((kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size)))
# define KMP_CPU_FREE(ptr) __kmp_free(ptr)
# define KMP_CPU_ALLOC_ON_STACK(ptr) (ptr = ((kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size)))
# define KMP_CPU_FREE_FROM_STACK(ptr) /* Nothing */
# define KMP_CPU_INTERNAL_ALLOC(ptr) (ptr = ((kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(__kmp_affin_mask_size)))
# define KMP_CPU_INTERNAL_FREE(ptr) KMP_INTERNAL_FREE(ptr)

//
// The following macro should be used to index an array of masks.
Expand All @@ -670,6 +754,12 @@ typedef DWORD kmp_affin_mask_t; /* for compatibility with older winbase.h */
//
# define KMP_CPU_INDEX(array,i) \
((kmp_affin_mask_t *)(((char *)(array)) + (i) * __kmp_affin_mask_size))
# define KMP_CPU_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)__kmp_allocate(n * __kmp_affin_mask_size)
# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_free(arr);
# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) arr = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(n * __kmp_affin_mask_size)
# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_INTERNAL_FREE(arr);

#endif /* KMP_USE_HWLOC */

//
// Declare local char buffers with this size for printing debug and info
Expand Down Expand Up @@ -716,6 +806,9 @@ enum affinity_top_method {
affinity_top_method_group,
#endif /* KMP_GROUP_AFFINITY */
affinity_top_method_flat,
#if KMP_USE_HWLOC
affinity_top_method_hwloc,
#endif
affinity_top_method_default
};

Expand Down

0 comments on commit 01dcf36

Please sign in to comment.