diff --git a/README b/README index 8a136c06d08..7ac1d6875c1 100644 --- a/README +++ b/README @@ -17,7 +17,7 @@ Copyright (c) 2010 Oak Ridge National Labs. All rights reserved. Copyright (c) 2011 University of Houston. All rights reserved. Copyright (c) 2013-2017 Intel, Inc. All rights reserved. Copyright (c) 2015 NVIDIA Corporation. All rights reserved. -Copyright (c) 2017 Los Alamos National Security, LLC. All rights +Copyright (c) 2017-2018 Los Alamos National Security, LLC. All rights reserved. Copyright (c) 2017 Research Organization for Information Science and Technology (RIST). All rights reserved. @@ -143,10 +143,7 @@ General notes Platform Notes -------------- -- ARM and POWER users may experience intermittent hangs when Open MPI - is compiled with low optimization settings, due to an issue with our - atomic list implementation. We recommend compiling with -O3 - optimization, both for performance reasons and to avoid this hang. +- N/A Compiler Notes diff --git a/opal/class/opal_fifo.h b/opal/class/opal_fifo.h index ad9cbdbcbb4..4adf000eb9b 100644 --- a/opal/class/opal_fifo.h +++ b/opal/class/opal_fifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2014-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights * reseved. * $COPYRIGHT$ * @@ -186,9 +186,10 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, opal_list_item_t *item) { + const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost; opal_list_item_t *tail_item; - item->opal_list_next = &fifo->opal_fifo_ghost; + item->opal_list_next = (opal_list_item_t *) ghost; opal_atomic_wmb (); @@ -197,7 +198,7 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, opal_atomic_wmb (); - if (&fifo->opal_fifo_ghost == tail_item) { + if (ghost == tail_item) { /* update the head */ fifo->opal_fifo_head.data.item = item; } else { @@ -215,14 +216,24 @@ static inline opal_list_item_t *opal_fifo_push_atomic (opal_fifo_t *fifo, */ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) { - opal_list_item_t *item, *next; + const opal_list_item_t * const ghost = &fifo->opal_fifo_ghost; #if OPAL_HAVE_ATOMIC_LLSC_PTR + register opal_list_item_t *item, *next; + int attempt = 0, ret = 0; + /* use load-linked store-conditional to avoid ABA issues */ do { - item = opal_atomic_ll_ptr (&fifo->opal_fifo_head.data.item); - if (&fifo->opal_fifo_ghost == item) { - if (&fifo->opal_fifo_ghost == fifo->opal_fifo_tail.data.item) { + if (++attempt == 5) { + /* deliberatly suspend this thread to allow other threads to run. this should + * only occur during periods of contention on the lifo. */ + _opal_lifo_release_cpu (); + attempt = 0; + } + + opal_atomic_ll_ptr(&fifo->opal_fifo_head.data.item, item); + if (ghost == item) { + if (ghost == fifo->opal_fifo_tail.data.item) { return NULL; } @@ -232,11 +243,12 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) } next = (opal_list_item_t *) item->opal_list_next; - if (opal_atomic_sc_ptr (&fifo->opal_fifo_head.data.item, next)) { - break; - } - } while (1); + opal_atomic_sc_ptr(&fifo->opal_fifo_head.data.item, next, ret); + } while (!ret); + #else + opal_list_item_t *item, *next; + /* protect against ABA issues by "locking" the head */ do { if (opal_atomic_cmpset_32 ((int32_t *) &fifo->opal_fifo_head.data.counter, 0, 1)) { @@ -258,9 +270,9 @@ static inline opal_list_item_t *opal_fifo_pop_atomic (opal_fifo_t *fifo) fifo->opal_fifo_head.data.item = next; #endif - if (&fifo->opal_fifo_ghost == next) { - if (!opal_atomic_cmpset_ptr (&fifo->opal_fifo_tail.data.item, item, &fifo->opal_fifo_ghost)) { - while (&fifo->opal_fifo_ghost == item->opal_list_next) { + if (ghost == next) { + if (!opal_atomic_cmpset_ptr (&fifo->opal_fifo_tail.data.item, item, (void *) ghost)) { + while (ghost == item->opal_list_next) { opal_atomic_rmb (); } diff --git a/opal/class/opal_lifo.h b/opal/class/opal_lifo.h index af3dedd9272..c240280a836 100644 --- a/opal/class/opal_lifo.h +++ b/opal/class/opal_lifo.h @@ -12,7 +12,7 @@ * All rights reserved. * Copyright (c) 2007 Voltaire All rights reserved. * Copyright (c) 2010 IBM Corporation. All rights reserved. - * Copyright (c) 2014-2015 Los Alamos National Security, LLC. All rights + * Copyright (c) 2014-2018 Los Alamos National Security, LLC. All rights * reseved. * Copyright (c) 2016 Research Organization for Information Science * and Technology (RIST). All rights reserved. @@ -204,8 +204,8 @@ static inline void _opal_lifo_release_cpu (void) */ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) { - opal_list_item_t *item, *next; - int attempt = 0; + register opal_list_item_t *item, *next; + int attempt = 0, ret; do { if (++attempt == 5) { @@ -215,13 +215,14 @@ static inline opal_list_item_t *opal_lifo_pop_atomic (opal_lifo_t* lifo) attempt = 0; } - item = (opal_list_item_t *) opal_atomic_ll_ptr (&lifo->opal_lifo_head.data.item); + opal_atomic_ll_ptr(&lifo->opal_lifo_head.data.item, item); if (&lifo->opal_lifo_ghost == item) { return NULL; } next = (opal_list_item_t *) item->opal_list_next; - } while (!opal_atomic_sc_ptr (&lifo->opal_lifo_head.data.item, next)); + opal_atomic_sc_ptr(&lifo->opal_lifo_head.data.item, next, ret); + } while (!ret); opal_atomic_wmb (); diff --git a/opal/include/opal/sys/arm64/atomic.h b/opal/include/opal/sys/arm64/atomic.h index 2f7f7d32aac..4f295a5141a 100644 --- a/opal/include/opal/sys/arm64/atomic.h +++ b/opal/include/opal/sys/arm64/atomic.h @@ -150,28 +150,31 @@ static inline int opal_atomic_cmpset_rel_32(volatile int32_t *addr, return (ret == oldval); } -static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) -{ - int32_t ret; - - __asm__ __volatile__ ("ldaxr %w0, [%1] \n" - : "=&r" (ret) - : "r" (addr)); - - return ret; -} - -static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) -{ - int ret; - - __asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" - : "=&r" (ret) - : "r" (addr), "r" (newval) - : "cc", "memory"); - - return ret == 0; -} +#define opal_atomic_ll_32(addr, ret) \ + do { \ + volatile int32_t *_addr = (addr); \ + int32_t _ret; \ + \ + __asm__ __volatile__ ("ldaxr %w0, [%1] \n" \ + : "=&r" (_ret) \ + : "r" (_addr)); \ + \ + ret = (typeof(ret)) _ret; \ + } while (0) + +#define opal_atomic_sc_32(addr, newval, ret) \ + do { \ + volatile int32_t *_addr = (addr); \ + int32_t _newval = (int32_t) newval; \ + int _ret; \ + \ + __asm__ __volatile__ ("stlxr %w0, %w2, [%1] \n" \ + : "=&r" (_ret) \ + : "r" (_addr), "r" (_newval) \ + : "cc", "memory"); \ + \ + ret = (_ret == 0); \ + } while (0) static inline int opal_atomic_cmpset_64(volatile int64_t *addr, int64_t oldval, int64_t newval) @@ -251,28 +254,31 @@ static inline int opal_atomic_cmpset_rel_64(volatile int64_t *addr, return (ret == oldval); } -static inline int64_t opal_atomic_ll_64 (volatile int64_t *addr) -{ - int64_t ret; - - __asm__ __volatile__ ("ldaxr %0, [%1] \n" - : "=&r" (ret) - : "r" (addr)); - - return ret; -} - -static inline int opal_atomic_sc_64 (volatile int64_t *addr, int64_t newval) -{ - int ret; - - __asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" - : "=&r" (ret) - : "r" (addr), "r" (newval) - : "cc", "memory"); - - return ret == 0; -} +#define opal_atomic_ll_64(addr, ret) \ + do { \ + volatile int64_t *_addr = (addr); \ + int64_t _ret; \ + \ + __asm__ __volatile__ ("ldaxr %0, [%1] \n" \ + : "=&r" (_ret) \ + : "r" (_addr)); \ + \ + ret = (typeof(ret)) _ret; \ + } while (0) + +#define opal_atomic_sc_64(addr, newval, ret) \ + do { \ + volatile int64_t *_addr = (addr); \ + int64_t _newval = (int64_t) newval; \ + int _ret; \ + \ + __asm__ __volatile__ ("stlxr %w0, %2, [%1] \n" \ + : "=&r" (_ret) \ + : "r" (_addr), "r" (_newval) \ + : "cc", "memory"); \ + \ + ret = (_ret == 0); \ + } while (0) #define OPAL_ASM_MAKE_ATOMIC(type, bits, name, inst, reg) \ static inline type opal_atomic_ ## name ## _ ## bits (volatile type *addr, type value) \ diff --git a/opal/include/opal/sys/atomic_impl.h b/opal/include/opal/sys/atomic_impl.h index 09f9eab58f5..e9ecd527ece 100644 --- a/opal/include/opal/sys/atomic_impl.h +++ b/opal/include/opal/sys/atomic_impl.h @@ -278,15 +278,15 @@ static inline int opal_atomic_cmpset_rel_ptr(volatile void* addr, #if SIZEOF_VOID_P == 4 && OPAL_HAVE_ATOMIC_LLSC_32 -#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_32((int32_t *) addr) -#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_32((int32_t *) addr, (int32_t) newval) +#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_32((volatile int32_t *) (addr), ret) +#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_32((volatile int32_t *) (addr), (intptr_t) (value), ret) #define OPAL_HAVE_ATOMIC_LLSC_PTR 1 #elif SIZEOF_VOID_P == 8 && OPAL_HAVE_ATOMIC_LLSC_64 -#define opal_atomic_ll_ptr(addr) (void *) opal_atomic_ll_64((int64_t *) addr) -#define opal_atomic_sc_ptr(addr, newval) opal_atomic_sc_64((int64_t *) addr, (int64_t) newval) +#define opal_atomic_ll_ptr(addr, ret) opal_atomic_ll_64((volatile int64_t *) (addr), ret) +#define opal_atomic_sc_ptr(addr, value, ret) opal_atomic_sc_64((volatile int64_t *) (addr), (intptr_t) (value), ret) #define OPAL_HAVE_ATOMIC_LLSC_PTR 1 diff --git a/opal/include/opal/sys/powerpc/atomic.h b/opal/include/opal/sys/powerpc/atomic.h index 060331ef95e..199a172eb4e 100644 --- a/opal/include/opal/sys/powerpc/atomic.h +++ b/opal/include/opal/sys/powerpc/atomic.h @@ -11,7 +11,7 @@ * Copyright (c) 2004-2005 The Regents of the University of California. * All rights reserved. * Copyright (c) 2010-2017 IBM Corporation. All rights reserved. - * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights * reserved. * $COPYRIGHT$ * @@ -158,31 +158,35 @@ static inline int opal_atomic_cmpset_32(volatile int32_t *addr, return (ret == oldval); } -static inline int32_t opal_atomic_ll_32 (volatile int32_t *addr) -{ - int32_t ret; - - __asm__ __volatile__ ("lwarx %0, 0, %1 \n\t" - : "=&r" (ret) - : "r" (addr) - ); - return ret; -} - -static inline int opal_atomic_sc_32 (volatile int32_t *addr, int32_t newval) -{ - int32_t ret, foo; - - __asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t" - " li %0,0 \n\t" - " bne- 1f \n\t" - " ori %0,%0,1 \n\t" - "1:" - : "=r" (ret), "=m" (*addr), "=r" (foo) - : "r" (addr), "r" (newval) - : "cc", "memory"); - return ret; -} +/* NTH: the LL/SC support is done through macros due to issues with non-optimized builds. The reason + * is that even with an always_inline attribute the compiler may still emit instructions to store then + * load the arguments to/from the stack. This sequence may cause the ll reservation to be cancelled. */ +#define opal_atomic_ll_32(addr, ret) \ + do { \ + volatile int32_t *_addr = (addr); \ + int32_t _ret; \ + __asm__ __volatile__ ("lwarx %0, 0, %1 \n\t" \ + : "=&r" (_ret) \ + : "r" (_addr) \ + ); \ + ret = (typeof(ret)) _ret; \ + } while (0) + +#define opal_atomic_sc_32(addr, value, ret) \ + do { \ + volatile int32_t *_addr = (addr); \ + int32_t _ret, _foo, _newval = (int32_t) value; \ + \ + __asm__ __volatile__ (" stwcx. %4, 0, %3 \n\t" \ + " li %0,0 \n\t" \ + " bne- 1f \n\t" \ + " ori %0,%0,1 \n\t" \ + "1:" \ + : "=r" (_ret), "=m" (*_addr), "=r" (_foo) \ + : "r" (_addr), "r" (_newval) \ + : "cc", "memory"); \ + ret = _ret; \ + } while (0) /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_32 nor @@ -280,31 +284,33 @@ static inline int opal_atomic_cmpset_64(volatile int64_t *addr, return (ret == oldval); } -static inline int64_t opal_atomic_ll_64(volatile int64_t *addr) -{ - int64_t ret; - - __asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" - : "=&r" (ret) - : "r" (addr) - ); - return ret; -} - -static inline int opal_atomic_sc_64(volatile int64_t *addr, int64_t newval) -{ - int32_t ret; - - __asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" - " li %0,0 \n\t" - " bne- 1f \n\t" - " ori %0,%0,1 \n\t" - "1:" - : "=r" (ret) - : "r" (addr), "r" (OPAL_ASM_VALUE64(newval)) - : "cc", "memory"); - return ret; -} +#define opal_atomic_ll_64(addr, ret) \ + do { \ + volatile int64_t *_addr = (addr); \ + int64_t _ret; \ + __asm__ __volatile__ ("ldarx %0, 0, %1 \n\t" \ + : "=&r" (_ret) \ + : "r" (_addr) \ + ); \ + ret = (typeof(ret)) _ret; \ + } while (0) + +#define opal_atomic_sc_64(addr, value, ret) \ + do { \ + volatile int64_t *_addr = (addr); \ + int64_t _foo, _newval = (int64_t) value; \ + int32_t _ret; \ + \ + __asm__ __volatile__ (" stdcx. %2, 0, %1 \n\t" \ + " li %0,0 \n\t" \ + " bne- 1f \n\t" \ + " ori %0,%0,1 \n\t" \ + "1:" \ + : "=r" (_ret) \ + : "r" (_addr), "r" (OPAL_ASM_VALUE64(_newval)) \ + : "cc", "memory"); \ + ret = _ret; \ + } while (0) /* these two functions aren't inlined in the non-gcc case because then there would be two function calls (since neither cmpset_64 nor diff --git a/opal/mca/patcher/base/patcher_base_patch.c b/opal/mca/patcher/base/patcher_base_patch.c index 8f731f8afb6..3d97bef6be2 100644 --- a/opal/mca/patcher/base/patcher_base_patch.c +++ b/opal/mca/patcher/base/patcher_base_patch.c @@ -106,6 +106,8 @@ static void flush_and_invalidate_cache (unsigned long a) __asm__ volatile("mfence;clflush %0;mfence" : :"m" (*(char*)a)); #elif OPAL_ASSEMBLY_ARCH == OPAL_IA64 __asm__ volatile ("fc %0;; sync.i;; srlz.i;;" : : "r"(a) : "memory"); +#elif OPAL_ASSEMBLY_ARCH == OPAL_ARM64 + __asm__ volatile ("dsb sy"); #endif } diff --git a/opal/mca/patcher/overwrite/configure.m4 b/opal/mca/patcher/overwrite/configure.m4 index 24494726851..50e5701723a 100644 --- a/opal/mca/patcher/overwrite/configure.m4 +++ b/opal/mca/patcher/overwrite/configure.m4 @@ -32,7 +32,7 @@ AC_DEFUN([MCA_opal_patcher_overwrite_CONFIG],[ if test $OPAL_ENABLE_DLOPEN_SUPPORT = 1; then # Disable ia64 for now. We can revive it later if anyone cares AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ -#if !defined(__i386__) && !defined(__x86_64__) && !defined(__PPC__) +#if !defined(__i386__) && !defined(__x86_64__) && !defined(__PPC__) && !defined(__aarch64__) #error "platform not supported" #endif ]],[])],[opal_patcher_overwrite_happy=yes],[]) diff --git a/opal/mca/patcher/overwrite/patcher_overwrite_module.c b/opal/mca/patcher/overwrite/patcher_overwrite_module.c index 210b8ae0e08..ef0bc2978f5 100644 --- a/opal/mca/patcher/overwrite/patcher_overwrite_module.c +++ b/opal/mca/patcher/overwrite/patcher_overwrite_module.c @@ -1,6 +1,6 @@ /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ /* - * Copyright (c) 2016 Los Alamos National Security, LLC. All rights + * Copyright (c) 2016-2018 Los Alamos National Security, LLC. All rights * reserved. * Copyright (c) 2016 IBM Corporation. All rights reserved. * Copyright (c) 2017 Research Organization for Information Science @@ -237,6 +237,77 @@ static int mca_patcher_overwrite_apply_patch (mca_patcher_base_patch_t *patch) return OPAL_SUCCESS; } +#elif defined(__aarch64__) + +/** + * @brief Generate a mov immediate instruction + * + * @param[in] reg register number (0-31) + * @param[in] shift shift amount (0-3) * 16-bits + * @param[in] value immediate value + */ +static uint32_t mov (unsigned int reg, uint16_t shift, uint16_t value) +{ + return (0x1a5 << 23) + ((uint32_t) shift << 21) + ((uint32_t) value << 5) + reg; +} + +/** + * @brief Generate a mov immediate with keep instruction + * + * @param[in] reg register number (0-31) + * @param[in] shift shift amount (0-3) * 16-bits + * @param[in] value immediate value + */ +static uint32_t movk (unsigned int reg, uint16_t shift, uint16_t value) +{ + return (0x1e5 << 23) + ((uint32_t) shift << 21) + ((uint32_t) value << 5) + reg; +} + +static uint32_t br (unsigned int reg) +{ + return (0xd61f << 16) + (reg << 5); +} + +static int +PatchLoadImm(uintptr_t addr, unsigned int reg, uint64_t value) +{ + *(uint32_t *) (addr + 0) = mov(reg, 3, value >> 48); + *(uint32_t *) (addr + 4) = movk(reg, 2, value >> 32); + *(uint32_t *) (addr + 8) = movk(reg, 1, value >> 16); + *(uint32_t *) (addr + 12) = movk(reg, 0, value); + return 16; +} + +static int mca_patcher_overwrite_apply_patch (mca_patcher_base_patch_t *patch) +{ + uintptr_t sys_addr, hook_addr; + int offset, rc; + + /* get system function address */ + sys_addr = mca_patcher_base_addr_text(patch->patch_orig); + hook_addr = mca_patcher_base_addr_text(patch->patch_value); + + /* Patch for hook function: */ + rc = mca_patcher_base_patch_hook (&mca_patcher_overwrite_module, hook_addr); + if (OPAL_SUCCESS != rc) { + return rc; + } + + /* Patch for system function: + * generate patch code + * r15 is the highest numbered temporary register. I am assuming this one is safe + * to use. */ + const unsigned int gr = 15; + offset = PatchLoadImm ((uintptr_t) patch->patch_data, gr, hook_addr); + *(uint32_t *) (patch->patch_data + offset) = br(gr); + patch->patch_data_size = offset + 4; + patch->patch_orig = sys_addr; + + mca_base_patcher_patch_apply_binary (patch); + + return OPAL_SUCCESS; +} + #endif static int mca_patcher_overwrite_patch_address (uintptr_t sys_addr, uintptr_t hook_addr)