From 621f46d272780c17af69fd0c062b9bd55f82e607 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 31 May 2016 13:33:05 -0600 Subject: [PATCH 1/2] atomic: add support for __atomic builtins This commit adds support for the gcc __atomic builtins. The __sync builtins are deprecated and have been replaced by these atomics. In addition, the new atomics support atomic exchange which was not supported by __sync. Signed-off-by: Nathan Hjelm --- config/opal_config_asm.m4 | 83 ++++++++- opal/include/opal/sys/Makefile.am | 1 + opal/include/opal/sys/architecture.h | 3 +- opal/include/opal/sys/atomic.h | 2 + opal/include/opal/sys/gcc_builtin/Makefile.am | 25 +++ opal/include/opal/sys/gcc_builtin/atomic.h | 161 ++++++++++++++++++ 6 files changed, 273 insertions(+), 2 deletions(-) create mode 100644 opal/include/opal/sys/gcc_builtin/Makefile.am create mode 100644 opal/include/opal/sys/gcc_builtin/atomic.h diff --git a/config/opal_config_asm.m4 b/config/opal_config_asm.m4 index 0b35dd3a47b..646a3d49cfa 100644 --- a/config/opal_config_asm.m4 +++ b/config/opal_config_asm.m4 @@ -116,6 +116,84 @@ __sync_add_and_fetch(&tmp, 1);], ]) +AC_DEFUN([OPAL_CHECK_GCC_BUILTIN_CSWAP_INT128], [ + + OPAL_VAR_SCOPE_PUSH([atomic_compare_exchange_n_128_result CFLAGS_save]) + + AC_ARG_ENABLE([cross-cmpset128],[AC_HELP_STRING([--enable-cross-cmpset128], + [enable the use of the __sync builtin atomic compare-and-swap 128 when cross compiling])]) + + atomic_compare_exchange_n_128_result=0 + + if test ! "$enable_cross_cmpset128" = "yes" ; then + AC_MSG_CHECKING([for processor support of __atomic builtin atomic compare-and-swap on 128-bit values]) + + AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);])], + [AC_MSG_RESULT([yes]) + atomic_compare_exchange_n_128_result=1], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([no (cross compiling)])]) + + if test $atomic_compare_exchange_n_128_result = 0 ; then + CFLAGS_save=$CFLAGS + CFLAGS="$CFLAGS -mcx16" + + AC_MSG_CHECKING([for __atomic builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) + AC_RUN_IFELSE([AC_LANG_PROGRAM([], [__int128 x = 0, y = 0; __atomic_compare_exchange_n (&x, &y, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED);])], + [AC_MSG_RESULT([yes]) + atomic_compare_exchange_n_128_result=1 + CFLAGS_save="$CFLAGS"], + [AC_MSG_RESULT([no])], + [AC_MSG_RESULT([no (cross compiling)])]) + + CFLAGS=$CFLAGS_save + fi + else + AC_MSG_CHECKING([for compiler support of __atomic builtin atomic compare-and-swap on 128-bit values]) + + # Check if the compiler supports the __atomic builtin + AC_TRY_LINK([], [__int128 x = 0; __atomic_bool_compare_and_swap (&x, 0, 1);], + [AC_MSG_RESULT([yes]) + atomic_compare_exchange_n_128_result=1], + [AC_MSG_RESULT([no])]) + + if test $atomic_compare_exchange_n_128_result = 0 ; then + CFLAGS_save=$CFLAGS + CFLAGS="$CFLAGS -mcx16" + + AC_MSG_CHECKING([for __atomic builtin atomic compare-and-swap on 128-bit values with -mcx16 flag]) + AC_TRY_LINK([], [__int128 x = 0; __atomic_bool_compare_and_swap (&x, 0, 1);], + [AC_MSG_RESULT([yes]) + atomic_compare_exchange_n_128_result=1 + CFLAGS_save="$CFLAGS"], + [AC_MSG_RESULT([no])]) + + CFLAGS=$CFLAGS_save + fi + fi + + AC_DEFINE_UNQUOTED([OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128], [$atomic_compare_exchange_n_128_result], + [Whether the __atomic builtin atomic compare and swap supports 128-bit values]) + + OPAL_VAR_SCOPE_POP +]) + +AC_DEFUN([OPAL_CHECK_GCC_ATOMIC_BUILTINS], [ + AC_MSG_CHECKING([for __atomic builtin atomics]) + + AC_TRY_LINK([long tmp, old = 0;], [__atomic_thread_fence(__ATOMIC_SEQ_CST); +__atomic_compare_exchange_n(&tmp, &old, 1, 0, __ATOMIC_RELAXED, __ATOMIC_RELAXED); +__atomic_add_fetch(&tmp, 1, __ATOMIC_RELAXED);], + [AC_MSG_RESULT([yes]) + $1], + [AC_MSG_RESULT([no]) + $2]) + + # Check for 128-bit support + OPAL_CHECK_GCC_BUILTIN_CSWAP_INT128 +]) + + dnl ################################################################# dnl dnl OPAL_CHECK_ASM_TEXT @@ -894,6 +972,9 @@ AC_DEFUN([OPAL_CONFIG_ASM],[ [], [enable_osx_builtin_atomics="yes"]) opal_cv_asm_builtin="BUILTIN_NO" + if test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes" ; then + OPAL_CHECK_GCC_ATOMIC_BUILTINS([opal_cv_asm_builtin="BUILTIN_GCC"], []) + fi if test "$opal_cv_asm_builtin" = "BUILTIN_NO" && test "$enable_builtin_atomics" = "yes" ; then OPAL_CHECK_SYNC_BUILTINS([opal_cv_asm_builtin="BUILTIN_SYNC"], []) fi @@ -1032,7 +1113,7 @@ AC_MSG_ERROR([Can not continue.]) opal_cv_asm_builtin="BUILTIN_NO" fi - if test "$opal_cv_asm_builtin" = "BUILTIN_SYNC" ; then + if test "$opal_cv_asm_builtin" = "BUILTIN_SYNC" || test "$opal_cv_asm_builtin" = "BUILTIN_GCC" ; then AC_DEFINE([OPAL_C_GCC_INLINE_ASSEMBLY], [1], [Whether C compiler supports GCC style inline assembly]) else diff --git a/opal/include/opal/sys/Makefile.am b/opal/include/opal/sys/Makefile.am index 01985a9badc..36cf2536b29 100644 --- a/opal/include/opal/sys/Makefile.am +++ b/opal/include/opal/sys/Makefile.am @@ -36,3 +36,4 @@ include opal/sys/osx/Makefile.am include opal/sys/powerpc/Makefile.am include opal/sys/sparcv9/Makefile.am include opal/sys/sync_builtin/Makefile.am +include opal/sys/gcc_builtin/Makefile.am diff --git a/opal/include/opal/sys/architecture.h b/opal/include/opal/sys/architecture.h index ead36e64f9b..57d0107fc69 100644 --- a/opal/include/opal/sys/architecture.h +++ b/opal/include/opal/sys/architecture.h @@ -39,7 +39,8 @@ #define OPAL_ARM 0100 #define OPAL_BUILTIN_SYNC 0200 #define OPAL_BUILTIN_OSX 0201 -#define OPAL_BUILTIN_NO 0202 +#define OPAL_BUILTIN_GCC 0202 +#define OPAL_BUILTIN_NO 0203 /* Formats */ #define OPAL_DEFAULT 1000 /* standard for given architecture */ diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 2a273722878..0102954fbb8 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -141,6 +141,8 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; /* don't include system-level gorp when generating doxygen files */ #elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_SYNC #include "opal/sys/sync_builtin/atomic.h" +#elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_GCC +#include "opal/sys/gcc_builtin/atomic.h" #elif OPAL_ASSEMBLY_BUILTIN == OPAL_BUILTIN_OSX #include "opal/sys/osx/atomic.h" #elif OPAL_ASSEMBLY_ARCH == OPAL_AMD64 diff --git a/opal/include/opal/sys/gcc_builtin/Makefile.am b/opal/include/opal/sys/gcc_builtin/Makefile.am new file mode 100644 index 00000000000..a717786a5e9 --- /dev/null +++ b/opal/include/opal/sys/gcc_builtin/Makefile.am @@ -0,0 +1,25 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2009 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2011 Sandia National Laboratories. All rights reserved. +# Copyright (c) 2016 Los Alamos National Security, LLC. All rights +# reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# This makefile.am does not stand on its own - it is included from opal/include/Makefile.am + +headers += \ + opal/sys/gcc_builtin/atomic.h diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h new file mode 100644 index 00000000000..8ad2cfa53bf --- /dev/null +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -0,0 +1,161 @@ +/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ +/* + * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2004-2013 The University of Tennessee and The University + * of Tennessee Research Foundation. All rights + * reserved. + * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, + * University of Stuttgart. All rights reserved. + * Copyright (c) 2004-2005 The Regents of the University of California. + * All rights reserved. + * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. + * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * reserved. + * $COPYRIGHT$ + * + * Additional copyrights may follow + * + * $HEADER$ + */ + +#ifndef OPAL_SYS_ARCH_ATOMIC_H +#define OPAL_SYS_ARCH_ATOMIC_H 1 + +#include + +/********************************************************************** + * + * Memory Barriers + * + *********************************************************************/ +#define OPAL_HAVE_ATOMIC_MEM_BARRIER 1 + +#define OPAL_HAVE_ATOMIC_MATH_32 1 +#define OPAL_HAVE_ATOMIC_CMPSET_32 1 +#define OPAL_HAVE_ATOMIC_ADD_32 1 +#define OPAL_HAVE_ATOMIC_SUB_32 1 +#define OPAL_HAVE_ATOMIC_SWAP_32 1 +#define OPAL_HAVE_ATOMIC_MATH_64 1 +#define OPAL_HAVE_ATOMIC_CMPSET_64 1 +#define OPAL_HAVE_ATOMIC_ADD_64 1 +#define OPAL_HAVE_ATOMIC_SUB_64 1 +#define OPAL_HAVE_ATOMIC_SWAP_64 1 + + +static inline void opal_atomic_mb(void) +{ + __atomic_thread_fence (__ATOMIC_SEQ_CST); +} + +static inline void opal_atomic_rmb(void) +{ + __atomic_thread_fence (__ATOMIC_ACQUIRE); +} + +static inline void opal_atomic_wmb(void) +{ + __atomic_thread_fence (__ATOMIC_RELEASE); +} + +#define MB() opal_atomic_mb() + +/********************************************************************** + * + * Atomic math operations + * + *********************************************************************/ + +static inline int opal_atomic_cmpset_acq_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + + +static inline int opal_atomic_cmpset_rel_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_RELEASE, __ATOMIC_RELAXED); +} + +static inline int opal_atomic_cmpset_32( volatile int32_t *addr, + int32_t oldval, int32_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); +} + +static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newval) +{ + int32_t oldval; + __atomic_exchange (addr, &newval, &oldval, __ATOMIC_RELAXED); + return oldval; +} + +static inline int32_t opal_atomic_add_32(volatile int32_t *addr, int32_t delta) +{ + return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); +} + +static inline int32_t opal_atomic_sub_32(volatile int32_t *addr, int32_t delta) +{ + return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); +} + +static inline int opal_atomic_cmpset_acq_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + +static inline int opal_atomic_cmpset_rel_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_RELEASE, __ATOMIC_RELAXED); +} + + +static inline int opal_atomic_cmpset_64( volatile int64_t *addr, + int64_t oldval, int64_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); +} + +static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) +{ + int64_t oldval; + __atomic_exchange (addr, &newval, &oldval, __ATOMIC_RELAXED); + return oldval; +} + +static inline int64_t opal_atomic_add_64(volatile int64_t *addr, int64_t delta) +{ + return __atomic_add_fetch (addr, delta, __ATOMIC_RELAXED); +} + +static inline int64_t opal_atomic_sub_64(volatile int64_t *addr, int64_t delta) +{ + return __atomic_sub_fetch (addr, delta, __ATOMIC_RELAXED); +} + +#if OPAL_HAVE_GCC_BUILTIN_CSWAP_INT128 + +#define OPAL_HAVE_ATOMIC_CMPSET_128 1 + +static inline int opal_atomic_cmpset_128 (volatile opal_int128_t *addr, + opal_int128_t oldval, opal_int128_t newval) +{ + return __atomic_compare_exchange_n (addr, &oldval, newval, false, + __ATOMIC_RELAXED, __ATOMIC_RELAXED); +} + +#endif + +#endif /* ! OPAL_SYS_ARCH_ATOMIC_H */ From bea9c8a8d64d4bdca255f833b63a80eaf2222af8 Mon Sep 17 00:00:00 2001 From: Nathan Hjelm Date: Tue, 31 May 2016 13:57:44 -0600 Subject: [PATCH 2/2] atomic: add support for transactional memory This commit adds support for using transactional memory when using opal atomic locks. This feature is enabled if the __HLE__ feature is available and the gcc builtin atomics are in use. Signed-off-by: Nathan Hjelm --- opal/include/opal/sys/atomic.h | 17 ++++---- opal/include/opal/sys/gcc_builtin/atomic.h | 47 ++++++++++++++++++++-- 2 files changed, 52 insertions(+), 12 deletions(-) diff --git a/opal/include/opal/sys/atomic.h b/opal/include/opal/sys/atomic.h index 0102954fbb8..6bebcc8b845 100644 --- a/opal/include/opal/sys/atomic.h +++ b/opal/include/opal/sys/atomic.h @@ -131,6 +131,14 @@ typedef struct opal_atomic_lock_t opal_atomic_lock_t; #define OPAL_HAVE_INLINE_ATOMIC_SWAP_64 1 #endif +/** + * Enumeration of lock states + */ +enum { + OPAL_ATOMIC_UNLOCKED = 0, + OPAL_ATOMIC_LOCKED = 1 +}; + /********************************************************************** * * Load the appropriate architecture files and set some reasonable @@ -266,15 +274,6 @@ void opal_atomic_wmb(void); #if defined(DOXYGEN) || OPAL_HAVE_ATOMIC_SPINLOCKS || (OPAL_HAVE_ATOMIC_CMPSET_32 || OPAL_HAVE_ATOMIC_CMPSET_64) -/** - * Enumeration of lock states - */ -enum { - OPAL_ATOMIC_UNLOCKED = 0, - OPAL_ATOMIC_LOCKED = 1 -}; - - /** * Initialize a lock to value * diff --git a/opal/include/opal/sys/gcc_builtin/atomic.h b/opal/include/opal/sys/gcc_builtin/atomic.h index 8ad2cfa53bf..db9e8ad9d97 100644 --- a/opal/include/opal/sys/gcc_builtin/atomic.h +++ b/opal/include/opal/sys/gcc_builtin/atomic.h @@ -86,7 +86,7 @@ static inline int opal_atomic_cmpset_32( volatile int32_t *addr, int32_t oldval, int32_t newval) { return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_RELAXED, __ATOMIC_RELAXED); + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } static inline int32_t opal_atomic_swap_32 (volatile int32_t *addr, int32_t newval) @@ -125,7 +125,7 @@ static inline int opal_atomic_cmpset_64( volatile int64_t *addr, int64_t oldval, int64_t newval) { return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_RELAXED, __ATOMIC_RELAXED); + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); } static inline int64_t opal_atomic_swap_64 (volatile int64_t *addr, int64_t newval) @@ -153,7 +153,48 @@ static inline int opal_atomic_cmpset_128 (volatile opal_int128_t *addr, opal_int128_t oldval, opal_int128_t newval) { return __atomic_compare_exchange_n (addr, &oldval, newval, false, - __ATOMIC_RELAXED, __ATOMIC_RELAXED); + __ATOMIC_ACQUIRE, __ATOMIC_RELAXED); +} + +#endif + +#if defined(__HLE__) + +#include + +#define OPAL_HAVE_ATOMIC_SPINLOCKS 1 + +static inline void opal_atomic_init (opal_atomic_lock_t* lock, int32_t value) +{ + lock->u.lock = value; +} + +static inline int opal_atomic_trylock(opal_atomic_lock_t *lock) +{ + int ret = __atomic_exchange_n (&lock->u.lock, OPAL_ATOMIC_LOCKED, + __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE); + if (OPAL_ATOMIC_LOCKED == ret) { + /* abort the transaction */ + _mm_pause (); + return 1; + } + + return 0; +} + +static inline void opal_atomic_lock (opal_atomic_lock_t *lock) +{ + while (OPAL_ATOMIC_LOCKED == __atomic_exchange_n (&lock->u.lock, OPAL_ATOMIC_LOCKED, + __ATOMIC_ACQUIRE | __ATOMIC_HLE_ACQUIRE)) { + /* abort the transaction */ + _mm_pause (); + } +} + +static inline void opal_atomic_unlock (opal_atomic_lock_t *lock) +{ + __atomic_store_n (&lock->u.lock, OPAL_ATOMIC_UNLOCKED, + __ATOMIC_RELEASE | __ATOMIC_HLE_RELEASE); } #endif